In [365]:
from pathlib import Path
import brickschema
import numpy as np
import pandas as pd
import rdflib
from rdflib import Namespace
from rdflib.namespace import RDFS, SKOS, BRICK


In [366]:
SENAPS = Namespace("http://senaps.io/schema/1.0/senaps#")
SENAPS['stream_id']

rdflib.term.URIRef('http://senaps.io/schema/1.0/senaps#stream_id')

In [367]:
SKOS['definition']

rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#definition')

In [368]:
SKOS

Namespace('http://www.w3.org/2004/02/skos/core#')

In [369]:
dataset_dir = '../../datasets/bts_site_b_train/'

dataset_zip = 'train.zip'
dataset_path = Path(dataset_dir) / dataset_zip

mapping_csv = 'mapper_TrainOnly.csv'
mapping_path = Path(dataset_dir) / mapping_csv

# building_ttl = 'Site_B_tim.ttl'
building_ttl = 'Site_B.ttl'
building_model = Path(dataset_dir) / building_ttl

brick_ttl = 'Brick_v1.2.1.ttl'
brick_schema = Path(dataset_dir) / brick_ttl

In [370]:
g_building = brickschema.Graph().load_file(building_model)
g_brick = brickschema.Graph().load_file(brick_schema)
# g_brick_latest = brickschema.Graph(load_brick=True)
g_brick_latest = brickschema.Graph(load_brick_nightly=True)

In [371]:
def sparql_to_df(g, q, **kwargs):
    res = g.query(q, **kwargs)
    df = pd.DataFrame(res.bindings)
    # are these necessary?
    df.columns = df.columns.map(str)
    # df = df.map(str)
    df.drop_duplicates(inplace=True)
    return df

In [372]:
# Get all brick entities and their classes in the building model
def get_brick_entities(g):
    q = '''
    SELECT ?entity ?brick_class ?stream_id ?named_unit ?anonymous_unit WHERE {
        ?entity a ?brick_class .
        OPTIONAL { ?entity senaps:stream_id ?stream_id } .
        OPTIONAL { ?entity brick:hasUnit ?named_unit .
                    filter ( strstarts(str(?named_unit),str(unit:)) ) } .
        OPTIONAL { ?entity brick:hasUnit [ brick:value ?anonymous_unit ] } .
        filter ( strstarts(str(?brick_class),str(brick:)) ) .
    }
    '''
    # q = '''
    # SELECT ?entity ?brick_class ?stream_id WHERE {
    #     ?entity a ?brick_class .
    #     OPTIONAL { ?entity senaps:stream_id ?stream_id } .
    #     filter ( strstarts(str(?brick_class),str(brick:)) ) .
    # }
    # '''
    return sparql_to_df(g, q)

get_brick_entities(g_building)

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,f9833afd_e8a3_437b_9031_f29b656c94f9
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,0ace4d2f_11d3_49f0_a61a_2f839ae3208e
...,...,...,...,...,...
1110,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,
1111,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,
1112,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,
1113,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,


In [373]:
df = get_brick_entities(g_building)

In [374]:
def class_in_brick(cls, g):
    return (cls, None, None) in g

In [375]:
df['class_in_provided_brick'] = df['brick_class'].apply(class_in_brick, args=(g_brick,))
print(len(df[df['class_in_provided_brick'] == True]), 'recognised by provided Brick schema:')
print(df[df['class_in_provided_brick'] == True].head())
print(len(df[df['class_in_provided_brick'] == False]), 'not recognised by provided Brick schema:')
print(df[df['class_in_provided_brick'] == False].head())

1060 recognised by provided Brick schema:
  anonymous_unit                                        brick_class  \
0            NaN  https://brickschema.org/schema/Brick#Temperatu...   
1            NaN  https://brickschema.org/schema/Brick#Temperatu...   
2            NaN  https://brickschema.org/schema/Brick#Temperatu...   
3            NaN  https://brickschema.org/schema/Brick#Temperatu...   
4            NaN  https://brickschema.org/schema/Brick#Temperatu...   

                                              entity named_unit  \
0  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   
1  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   
2  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   
3  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   
4  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   

                              stream_id  class_in_provided_brick  
0  3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4                     T

In [376]:
df['class_in_latest_brick'] = df['brick_class'].apply(class_in_brick, args=(g_brick_latest,))
print(len(df[df['class_in_latest_brick'] == True]), 'recognised by provided Brick schema:')
print(df[df['class_in_latest_brick'] == True].head())
print(len(df[df['class_in_latest_brick'] == False]), 'not recognised by provided Brick schema:')
print(df[df['class_in_latest_brick'] == False].head())

1021 recognised by provided Brick schema:
  anonymous_unit                                        brick_class  \
0            NaN  https://brickschema.org/schema/Brick#Temperatu...   
1            NaN  https://brickschema.org/schema/Brick#Temperatu...   
2            NaN  https://brickschema.org/schema/Brick#Temperatu...   
3            NaN  https://brickschema.org/schema/Brick#Temperatu...   
4            NaN  https://brickschema.org/schema/Brick#Temperatu...   

                                              entity named_unit  \
0  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   
1  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   
2  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   
3  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   
4  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...        NaN   

                              stream_id  class_in_provided_brick  \
0  3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4                     

In [377]:
def get_brick_definition(cls, g, g_alt=None):
    if cls is None:
        return None
    
    # predicate = rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#definition')
    predicate = SKOS['definition']
    definition = g.value(subject=cls, predicate=predicate)
    
    original_cls = cls
    while definition is None:
        cls = g.value(subject=cls, predicate=RDFS['subClassOf'])
        # print(cls)
        if cls is None:
            break
        definition = g.value(subject=cls, predicate=predicate)
    
    if definition is None and g_alt is not None:
        return get_brick_definition(original_cls, g_alt)
    
    return g.value(subject=cls, predicate=predicate)

In [378]:
df['brick_definition'] = df['brick_class'].apply(get_brick_definition, args=(g_brick,))
# df['brick_definition'] = df['class'].apply(get_brick_definition, args=(g_brick_latest,))
# df['brick_definition'] = df['class'].apply(get_brick_definition, args=(g_brick, g_brick_latest))
df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature


In [379]:
if 'named_unit' not in df.columns:
    df['named_unit'] = None
if 'anonymous_unit' not in df.columns:
    df['anonymous_unit'] = None


df = df.assign(unit=lambda x: x['named_unit'].combine_first(x['anonymous_unit']))

# def unit_in_brick(unit, g):
#     return (unit, None, None) in g


# df['unit_in_provided_brick'] = df['named_unit'].apply(class_in_brick, args=(g_brick,))

In [380]:
def unit_is_named(r):
    if pd.isna(r.unit):
        return None
    
    return not pd.isna(r.named_unit)

In [381]:
df['unit_is_named'] = df.apply(unit_is_named, axis=1)
df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,unit,unit_is_named
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,


In [382]:
# df.assign(unit_is_named=lambda x: x['unit'].apply(lambda u: u is not None and u.startswith('unit:')))   

In [383]:
# Load the mapping file
mapping_df = pd.read_csv(mapping_path, index_col=0)

# Building B only
# mapping_df = mapping_df[mapping_df['Building'] == 'B']

# Ignore streams not saved to file
mapping_df = mapping_df[mapping_df['Filename'].str.contains('FILE NOT SAVED') == False]

mapping_df.head()

Unnamed: 0,Building,StreamID,Filename,strBrickLabel
0,A,9ba955fa_5960_4c9b_b73a_10156da7d083,trainAll_0.pkl,Operating_Mode_Status
2,A,8fd6e75b_88bc_4992_b420_77389969b3c4,trainAll_1.pkl,Mode_Command
3,A,8db6eaa9_bd6c_4f7e_aed0_a47e4e192a6c,trainAll_2.pkl,Active_Power_Sensor
4,A,b2338dec_110a_45cc_8358_1171aaef2c45,trainAll_3.pkl,System_Status
5,A,ec5ff874_0af2_49d8_a6a0_21ea3d077dc8,trainAll_4.pkl,Maintenance_Mode_Command


In [384]:
df['stream_id'][0]

rdflib.term.Literal('3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4')

In [385]:
def stream_exists_in_mapping(s, mapping_df):
    if pd.isna(s):
        return None
    return str(s).strip() in mapping_df['StreamID'].values

In [386]:
df['stream_exists_in_mapping'] = df['stream_id'].apply(stream_exists_in_mapping, args=(mapping_df,))
df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,unit,unit_is_named,stream_exists_in_mapping
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,True
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,True
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,True
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,True
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,True


In [387]:
def brick_class_in_mapping(s, mapping_df):
    if pd.isna(s):
        return None
    mapping_df['StreamID']
    return str(s).strip() in mapping_df['StreamID'].values

In [388]:
# Convert df['stream_id'] to string for the join
df['stream_id_str'] = df['stream_id'].apply(lambda x: str(x))

# Perform the left join
df = pd.merge(df, mapping_df[['StreamID', 'strBrickLabel']], how='left', left_on='stream_id_str', right_on='StreamID')

# Optionally drop the temporary column 'stream_id_str' and 'StreamID' after the merge
df = df.drop(columns=['stream_id_str', 'StreamID'])
df.rename(columns={'strBrickLabel': 'brick_class_in_mapping'}, inplace=True)

df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,unit,unit_is_named,stream_exists_in_mapping,brick_class_in_mapping
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,True,Temperature_Setpoint
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,True,Temperature_Setpoint
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,True,Temperature_Setpoint
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,True,Temperature_Setpoint
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,True,Temperature_Setpoint


In [389]:
df['brick_class_fragment'] = df['brick_class'].apply(lambda x: str(x.fragment) if x is not None else None)

df['brick_class_is_consistent'] = np.where(
    pd.isna(df['brick_class_in_mapping']),  # Check if brick_class_in_mapping is empty
    None,  # Leave empty where there's no mapping value
    df['brick_class_fragment'] == df['brick_class_in_mapping']  # Compare fragment with the mapping
)

df.drop(columns=['brick_class_fragment'], inplace=True)

df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,unit,unit_is_named,stream_exists_in_mapping,brick_class_in_mapping,brick_class_is_consistent
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,True,Temperature_Setpoint,True
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,True,Temperature_Setpoint,True
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,True,Temperature_Setpoint,True
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,True,Temperature_Setpoint,True
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,True,Temperature_Setpoint,True


In [390]:
def defrag_uri(s):
    if isinstance(s, rdflib.term.URIRef):
        if '#' in s:
            return s.fragment
        elif '/' in s:
            return s.split('/')[-1]
    return s

In [391]:
for col in df.columns:
    df[col] = df[col].apply(defrag_uri)

In [392]:
df.to_csv('model_quality.csv', index=False)

---
# VISUALISATION

In [393]:
# import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [395]:
entity_in_provided_brick = df[['brick_class', 'entity', 'class_in_provided_brick']].copy()
entity_in_provided_brick.sort_values(by=['class_in_provided_brick', 'brick_class', 'entity'], inplace=True)
entity_in_provided_brick.head()

Unnamed: 0,brick_class,entity,class_in_provided_brick
957,Electrical_Circuit,4e5f09a8_0b2e_4c60_a6da_f7d332d6808e,False
960,Electrical_Circuit,4f525a1e_69b2_4fff_be62_16b262d3724b,False
956,Electrical_Circuit,6dc225a8_753e_4e47_8528_a3c03ea4e496,False
961,Electrical_Circuit,a4bcf16d_a84e_414e_b9c1_603c3d10cb3e,False
959,Electrical_Circuit,d8d2f843_6cc8_4b62_892e_5e67b56be41c,False


In [414]:
to_plot = entity_in_provided_brick[['brick_class', 'class_in_provided_brick']].groupby('class_in_provided_brick').count()
to_plot.reset_index(inplace=True)
to_plot['class_in_provided_brick'] = to_plot['class_in_provided_brick'].apply(lambda x: 'Recognised' if x else 'Unrecognised')
to_plot.head()

Unnamed: 0,class_in_provided_brick,brick_class
0,Unrecognised,55
1,Recognised,1060


In [417]:
import plotly.express as px
fig = px.pie(to_plot, values='brick_class', names='class_in_provided_brick',
             title='Proportion of Brick Entities Recognised by Provided Brick Schema',
            #  hover_data={'brick_class': True, 'class_in_provided_brick': False}, 
            #  hover_data={'brick_class': True, 'class_in_provided_brick': False}, 
            #  labels={'brick_class':'Number of Entities'})
             labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'})
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [427]:
to_plot = entity_in_provided_brick[['brick_class', 'class_in_provided_brick']].copy()
# to_plot.reset_index(inplace=True)
to_plot['class_in_provided_brick'] = to_plot['class_in_provided_brick'].apply(lambda x: 'Recognised' if x else 'Unrecognised')
to_plot.head()

Unnamed: 0,brick_class,class_in_provided_brick
957,Electrical_Circuit,Unrecognised
960,Electrical_Circuit,Unrecognised
956,Electrical_Circuit,Unrecognised
961,Electrical_Circuit,Unrecognised
959,Electrical_Circuit,Unrecognised


In [434]:
fig = make_subplots(
    rows=2, cols=1,
    # shared_xaxes=True,
    vertical_spacing=0.03,
    specs=[[{"type": "pie"}],
           [{"type": "table"}]]
)

labels = to_plot['class_in_provided_brick'].value_counts().index
values = to_plot['class_in_provided_brick'].value_counts().values

fig.add_trace(
    go.Pie(#to_plot, 
        #    values='brick_class', 
            labels=labels,
              values=values,
              textposition='inside', 
              textinfo='percent+label',
              name="",
        #    names='class_in_provided_brick',
        #    title='Proportion of Brick Entities Recognised by Provided Brick Schema',
        #    labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'}
    ),
    row=1, col=1
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID", "Class Recognised"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns],
            align = "left")
    ),
    row=2, col=1
)

fig.update_layout(
    height=800,
    showlegend=False,
    title_text="Brick Entities in Building Model Recognised by Brick Schema",
)

fig.show()


In [448]:
fig = make_subplots(
    rows=3, cols=1,
    # shared_xaxes=True,
    # vertical_spacing=0.1,
    vertical_spacing=0.03,
    subplot_titles = ['Proportion of Entities', 'Recognised Entities', 'Unrecognised Entities'],
    specs=[[{"type": "pie"}],
           [{"type": "table"}],
           [{"type": "table"}]]
)

labels = to_plot['class_in_provided_brick'].value_counts().index
values = to_plot['class_in_provided_brick'].value_counts().values

fig.add_trace(
    go.Pie(#to_plot, 
        #    values='brick_class', 
            labels=labels,
              values=values,
              textposition='inside', 
              textinfo='percent+label',
              name="",
        #    names='class_in_provided_brick',
        #    title='Proportion of Brick Entities Recognised by Provided Brick Schema',
        #    labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'}
    ),
    row=1, col=1
)

entity_in_provided_brick_true = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == True]
entity_in_provided_brick_false = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick_true[k].tolist() for k in entity_in_provided_brick_true.columns[:2]],
            align = "left")
    ),
    row=2, col=1
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick_false[k].tolist() for k in entity_in_provided_brick_false.columns[:2]],
            align = "left")
    ),
    row=3, col=1,
)


fig.update_layout(
    height=1200,
    showlegend=False,
    title_text="Brick Entities in Building Model Recognised by Brick Schema",
)

fig.show()

In [459]:
fig = make_subplots(
    rows=3, cols=2,
    # shared_xaxes=True,
    # vertical_spacing=0.1,
    vertical_spacing=0.03,
    subplot_titles = ['Proportion of Entities', 'Unrecognised by Class', 'Unrecognised Entities', 'Recognised Entities'],
    specs=[[{"type": "pie"}, {"type": "pie"}],
           [{"type": "table", 'colspan': 2}, None],
           [{"type": "table", 'colspan': 2}, None]]
)

labels = to_plot['class_in_provided_brick'].value_counts().index
values = to_plot['class_in_provided_brick'].value_counts().values

fig.add_trace(
    go.Pie(#to_plot, 
        #    values='brick_class', 
            labels=labels,
              values=values,
              textposition='inside', 
              textinfo='percent+label',
              name="",
        #    names='class_in_provided_brick',
        #    title='Proportion of Brick Entities Recognised by Provided Brick Schema',
        #    labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'}
    ),
    row=1, col=1
)

entity_in_provided_brick_true = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == True]
entity_in_provided_brick_false = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]

labels = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]['brick_class'].value_counts().index
values = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]['brick_class'].value_counts().values

fig.add_trace(
    go.Pie(#to_plot, 
        #    values='brick_class', 
            labels=labels,
              values=values,
              textposition='inside', 
              textinfo='percent+label',
            #   textinfo='value+label',
              name="",
        #    names='class_in_provided_brick',
        #    title='Proportion of Brick Entities Recognised by Provided Brick Schema',
        #    labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'}
    ),
    row=1, col=2
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick_false[k].tolist() for k in entity_in_provided_brick_false.columns[:2]],
            align = "left")
    ),
    row=2, col=1,
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick_true[k].tolist() for k in entity_in_provided_brick_true.columns[:2]],
            align = "left")
    ),
    row=3, col=1
)

fig.update_layout(
    height=1200,
    showlegend=False,
    title_text="Brick Entities in Building Model Recognised by Brick Schema",
    title_x=0.5,
)

fig.show()

In [460]:
fig.write_html("brick_entities_recognised_by_schema.html")

---

# SCRATCH

In [239]:
# streams with blank node units
q = '''
SELECT ?a ?b
WHERE {
    ?a brick:hasUnit [ brick:value ?b ] .
}
'''
sparql_to_df(g_building, q)

Unnamed: 0,a,b
0,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,A
1,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,A
2,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,A
3,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,A
4,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,A
...,...,...
115,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,V
116,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,V
117,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,V
118,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,V


In [240]:
# streams with blank node units
q = '''
SELECT ?a ?b
WHERE {
    ?a brick:hasUnit ?b .
    ?b rdf:type/rdfs:subClassOf* unit: .
}
'''
sparql_to_df(g_building, q)

In [241]:
# streams with proper units
q = '''
SELECT ?a ?b
WHERE {
    ?a brick:hasUnit ?b .
    filter ( strstarts(str(?b),str(unit:)) ) .
}
'''
sparql_to_df(g_building, q)

Unnamed: 0,a,b
0,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,http://qudt.org/vocab/unit/KiloW


In [242]:
for ns_prefix, namespace in g_building.namespaces():
    print(f'{ns_prefix}: {namespace}')

brick: https://brickschema.org/schema/Brick#
csvw: http://www.w3.org/ns/csvw#
dc: http://purl.org/dc/elements/1.1/
dcat: http://www.w3.org/ns/dcat#
dcmitype: http://purl.org/dc/dcmitype/
dcterms: http://purl.org/dc/terms/
dcam: http://purl.org/dc/dcam/
doap: http://usefulinc.com/ns/doap#
foaf: http://xmlns.com/foaf/0.1/
geo: http://www.opengis.net/ont/geosparql#
odrl: http://www.w3.org/ns/odrl/2/
org: http://www.w3.org/ns/org#
prof: http://www.w3.org/ns/dx/prof/
prov: http://www.w3.org/ns/prov#
qb: http://purl.org/linked-data/cube#
schema: https://schema.org/
sh: http://www.w3.org/ns/shacl#
skos: http://www.w3.org/2004/02/skos/core#
sosa: http://www.w3.org/ns/sosa/
ssn: http://www.w3.org/ns/ssn/
time: http://www.w3.org/2006/time#
vann: http://purl.org/vocab/vann/
void: http://rdfs.org/ns/void#
wgs: https://www.w3.org/2003/01/geo/wgs84_pos#
owl: http://www.w3.org/2002/07/owl#
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
rdfs: http://www.w3.org/2000/01/rdf-schema#
xsd: http://www.w3.

In [245]:
df.to_csv('model_quality_defrag.csv', index=False)

In [246]:
def get_anonymous_units(s, g):
    q = '''
    SELECT ?units
    WHERE {
        ?entity brick:hasUnit [ brick:value ?units ] .
    }
    '''
    return sparql_to_df(g, q, initBindings={'entity': s})

df['anonymous_units'] = df['entity'].apply(get_anonymous_units, args=(g_building,))

df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,unit,unit_is_named,stream_exists_in_mapping,stream_exists_in_mapping_2,anonymous_units
0,,Temperature_Setpoint,da5873de_e112_4f9b_9e0f_149bd1221894.8084c259_...,,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: []
1,,Temperature_Setpoint,124295e4_f6a9_4904_baa2_6bb2752d0059.859193de_...,,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: []
2,,Temperature_Setpoint,5953d8e3_565b_4b29_a498_5ef50f9aefbe.c9640af1_...,,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: []
3,,Temperature_Setpoint,554f7ba1_7232_4a12_a2b6_0954976ff3b9.a8a33c70_...,,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: []
4,,Temperature_Setpoint,951d1945_98d0_4329_9025_dce55687f3a4.7ea99eda_...,,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: []


In [247]:
def get_named_units(s, g):
    # streams with proper units
    q = '''
    SELECT ?named_units
    WHERE {
        ?entity brick:hasUnit ?named_units .
        # filter ( strstarts(str(?b),str(unit:)) ) .
    }
    '''
    return sparql_to_df(g, q, initBindings={'entity': s})

df['named_units'] = df['entity'].apply(get_named_units, args=(g_building,))

df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,unit,unit_is_named,stream_exists_in_mapping,stream_exists_in_mapping_2,anonymous_units,named_units
0,,Temperature_Setpoint,da5873de_e112_4f9b_9e0f_149bd1221894.8084c259_...,,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []
1,,Temperature_Setpoint,124295e4_f6a9_4904_baa2_6bb2752d0059.859193de_...,,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []
2,,Temperature_Setpoint,5953d8e3_565b_4b29_a498_5ef50f9aefbe.c9640af1_...,,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []
3,,Temperature_Setpoint,554f7ba1_7232_4a12_a2b6_0954976ff3b9.a8a33c70_...,,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []
4,,Temperature_Setpoint,951d1945_98d0_4329_9025_dce55687f3a4.7ea99eda_...,,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,True,True,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []


In [236]:
# df['stream_exists_in_mapping'] = np.where(df['stream_id'].isna(), np.nan, df['stream_id'].str.isin(mapping_df['StreamID']))

df['stream_exists_in_mapping'] = np.where(
    df['stream_id'].apply(lambda x: pd.isna(str(x).strip())), pd.NA,  # Handle empty Literal
    df['stream_id'].apply(lambda x: str(x)).isin(mapping_df['StreamID'])  # Convert Literal to string and check
)
# df['stream_exists_in_mapping'] = df['stream_exists_in_mapping'].astype('boolean')
df['stream_exists_in_mapping'] = df['stream_exists_in_mapping'].apply(
    lambda x: pd.NA if pd.isna(x) else bool(x)
)

df.head()
# df[('stream_id' == '')]
# df[df['stream_id'] != '']
df[pd.isna(df['stream_id'])]
# for val in df['stream_id']:
#     print(val)

Unnamed: 0,anonymous_unit,brick_class,entity,named_unit,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,unit,unit_is_named,stream_exists_in_mapping
661,,https://brickschema.org/schema/Brick#Supply_Fan,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Fan moving supply air -- air that is supplied ...,,,False
662,,https://brickschema.org/schema/Brick#Supply_Fan,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Fan moving supply air -- air that is supplied ...,,,False
663,,https://brickschema.org/schema/Brick#Supply_Fan,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Fan moving supply air -- air that is supplied ...,,,False
664,,https://brickschema.org/schema/Brick#Supply_Fan,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Fan moving supply air -- air that is supplied ...,,,False
665,,https://brickschema.org/schema/Brick#Supply_Fan,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Fan moving supply air -- air that is supplied ...,,,False
...,...,...,...,...,...,...,...,...,...,...,...
1110,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Assembly consisting of sections containing a f...,,,False
1111,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Assembly consisting of sections containing a f...,,,False
1112,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Assembly consisting of sections containing a f...,,,False
1113,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,,,True,True,Assembly consisting of sections containing a f...,,,False
