# create_metadata

This notebook creates the CSVW metadata file for the 'observation.csv' CSV file.

## Setup

Imports packages.

Information on the `csvw_functions` package is available here: https://github.com/stevenkfirth/csvw_functions

In [1]:
import csvw_functions
import json
import rdflib

## Get embedded metadata

Reads the CSV file and extracts the information from the column headings to form an initial CSVW metadata document.

In [2]:
metadata_table_dict=\
    csvw_functions.get_embedded_metadata(
        'observation.csv',
        relative_path=True  # sets the 'url' table property to a path relative to the current working directory.
)
metadata_table_dict

{'@context': 'http://www.w3.org/ns/csvw',
 'tableSchema': {'columns': [{'titles': {'und': ['id']}, 'name': 'id'},
   {'titles': {'und': ['type']}, 'name': 'type'},
   {'titles': {'und': ['resultTime']}, 'name': 'resultTime'},
   {'titles': {'und': ['usedProcedure']}, 'name': 'usedProcedure'},
   {'titles': {'und': ['madeBySensor']}, 'name': 'madeBySensor'},
   {'titles': {'und': ['observedProperty']}, 'name': 'observedProperty'},
   {'titles': {'und': ['hasFeatureOfInterest']},
    'name': 'hasFeatureOfInterest'},
   {'titles': {'und': ['hasSimpleResult']}, 'name': 'hasSimpleResult'},
   {'titles': {'und': ['hasResult_value']}, 'name': 'hasResult_value'},
   {'titles': {'und': ['hasResult_unit']}, 'name': 'hasResult_unit'}]},
 'url': 'observation.csv'}

## Add new information to the metadata document

This section adds additional information to create a complete metadata document.

### add column descriptions, datatypes and units

Adds additional information to each column.

In [3]:
url='https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#'

data={
    'id':{
        'datatype':'string',
        'suppressOutput': True
    },
    'type':{
        'datatype':'string',
        "aboutUrl": url+'{id}',
        "propertyUrl": 'rdf:type',
        "valueUrl": 'sosa:Observation'
    },
    'resultTime':{
        'datatype':'datetime',
        "aboutUrl": url+'{id}',
        "propertyUrl": 'sosa:resultTime',
    },
    'usedProcedure':{
        'datatype':'string',
        "aboutUrl": url+'{id}',
        "propertyUrl": 'sosa:usedProdecure',
        "valueUrl": url+'{usedProcedure}'
    },
    'madeBySensor':{
        'datatype':'string',
        "aboutUrl": url+'{id}',
        "propertyUrl": 'sosa:madeBySensor',
        "valueUrl": url+'{madeBySensor}'
    },
    'observedProperty':{
        'datatype':'string',
        "aboutUrl": url+'{id}',
        "propertyUrl": 'sosa:observedProperty',
        "valueUrl": url+'{observedProperty}'
    },
    'hasFeatureOfInterest':{
        'datatype':'string',
        "aboutUrl": url+'{id}',
        "propertyUrl": 'sosa:hasFeatureOfInterest',
        "valueUrl": url+'{hasFeatureOfInterest}'
    },
    'hasSimpleResult':{
        'datatype':'string',
        "aboutUrl": url+'{id}',
        "propertyUrl": 'sosa:hasSimpleResult',
    },
    'hasResult_value':{
        'datatype':'number',
        "aboutUrl": url+'{id}_Result',
        "propertyUrl": 'http://qudt.org/schema/qudt/value',
    },
    'hasResult_unit':{
        'datatype':'string',
        "aboutUrl": url+'{id}_Result',
        "propertyUrl": 'http://qudt.org/schema/qudt/unit',
        "valueUrl": 'http://qudt.org/vocab/unit/{hasResult_unit}'
    }
}
for col_dict in metadata_table_dict['tableSchema']['columns']:
    for k,v in data[col_dict['name']].items():
        col_dict[k]=v
        
metadata_table_dict['tableSchema']['columns'].append(
    {
        'name': 'result_virtual_column',
        'virtual': True,
        "aboutUrl": url+'{id}',
        "propertyUrl": 'sosa:hasResult',
        "valueUrl": url+'{id}_Result'
    }
)

metadata_table_dict['tableSchema']['columns'].append(
    {
        'name': 'result_type_virtual_column',
        'virtual': True,
        "aboutUrl": url+'{id}_Result',
        "propertyUrl": 'rdf:type',
        "valueUrl": 'sosa:Result'
    }
)
        
metadata_table_dict

{'@context': 'http://www.w3.org/ns/csvw',
 'tableSchema': {'columns': [{'titles': {'und': ['id']},
    'name': 'id',
    'datatype': 'string',
    'suppressOutput': True},
   {'titles': {'und': ['type']},
    'name': 'type',
    'datatype': 'string',
    'aboutUrl': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#{id}',
    'propertyUrl': 'rdf:type',
    'valueUrl': 'sosa:Observation'},
   {'titles': {'und': ['resultTime']},
    'name': 'resultTime',
    'datatype': 'datetime',
    'aboutUrl': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#{id}',
    'propertyUrl': 'sosa:resultTime'},
   {'titles': {'und': ['usedProcedure']},
    'name': 'usedProcedure',
    'datatype': 'string',
    'aboutUrl': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#{id}',
    'propertyUrl': 'sosa:usedProdecure',
    'valueUrl': 'https://github.com/

## Save the newly created metadata table object

In [4]:
with open('observation.csv-metadata.json','w') as f:
    json.dump(metadata_table_dict,f,indent=4)

## Testing

To test the newly created metadata file, we can use the `csvw_functions` package to create an annotated table group object and chaeck for errors. We can also convert the data to JSON-LD to check that this process works fine.


In [5]:
annotated_table_group_dict=csvw_functions.create_annotated_table_group(
    'observation.csv-metadata.json'
)

*(No runtime errors)*

In [6]:
csvw_functions.get_errors(annotated_table_group_dict)

[]

*(No errors stored in the annotated table group object)*

In [7]:
json_ld=csvw_functions.create_json_ld(
    annotated_table_group_dict,
    mode='minimal'
)
json_ld[0:2]

[{'@id': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Observation0',
  '@type': 'sosa:Observation',
  'sosa:resultTime': '2013-10-02T06:00:00',
  'sosa:usedProdecure': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Procedure1',
  'sosa:madeBySensor': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Sensor1_Hobo_U12',
  'sosa:observedProperty': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#AirTemperature',
  'sosa:hasFeatureOfInterest': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#ABCE_atrium',
  'sosa:hasResult': {'@id': 'https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Observation0_Result',
   'http://qudt.org/schema/qudt/value': 19.865,
   'http://qudt.org/sch

*(No runtime errors. Conversion looks fine.)*

In [8]:
rdf_ntriples=csvw_functions.create_rdf(
    annotated_table_group_dict,
    mode='minimal'
)
print(rdf_ntriples[:1000])

<https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Observation0> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/sosa/Observation> .
<https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Observation0> <http://www.w3.org/ns/sosa/resultTime> "2013-10-02T06:00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
<https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Observation0> <http://www.w3.org/ns/sosa/usedProdecure> <https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Procedure1> .
<https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Observation0> <http://www.w3.org/ns/sosa/madeBySensor> <https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#Sensor1_Hobo_U12> .
<https://github

In [9]:
g=rdflib.Graph().parse(data=rdf_ntriples,format='ntriples')
g

<Graph identifier=N1e36bc5123a6472194eb0d5a4d1b4c87 (<class 'rdflib.graph.Graph'>)>

In [10]:
g.bind('sensordata',rdflib.URIRef('https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#'))
g.bind('sosa',rdflib.URIRef('http://www.w3.org/ns/sosa/'))
g.bind('qudt-unit',rdflib.URIRef('http://qudt.org/vocab/unit/'))
g.bind('qudt',rdflib.URIRef('http://qudt.org/schema/qudt/'))
print(g.serialize(format='ttl')[:1000])

@prefix qudt: <http://qudt.org/schema/qudt/> .
@prefix qudt-unit: <http://qudt.org/vocab/unit/> .
@prefix sensordata: <https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#> .
@prefix sosa: <http://www.w3.org/ns/sosa/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sensordata:Observation0 a sosa:Observation ;
    sosa:hasFeatureOfInterest sensordata:ABCE_atrium ;
    sosa:hasResult sensordata:Observation0_Result ;
    sosa:madeBySensor sensordata:Sensor1_Hobo_U12 ;
    sosa:observedProperty sensordata:AirTemperature ;
    sosa:resultTime "2013-10-02T06:00:00"^^xsd:dateTime ;
    sosa:usedProdecure sensordata:Procedure1 .

sensordata:Observation1 a sosa:Observation ;
    sosa:hasFeatureOfInterest sensordata:ABCE_atrium ;
    sosa:hasResult sensordata:Observation1_Result ;
    sosa:madeBySensor sensordata:Sensor1_Hobo_U12 ;
    sosa:observedProperty sensordata:AirRelativeHumidity ;
    sosa:resultTime "2013-10-02T06:00:00"^^xsd:dateT

In [11]:
query="""
SELECT ?s ?p ?o
WHERE {
    ?s ?p ?o. 
    ?s a sosa:Result
}
LIMIT 10
"""
result=g.query(query)
#for row in result:
#    print(row)
g1=rdflib.Graph()
g1.bind('sensordata',rdflib.URIRef('https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#'))
g1.bind('sosa',rdflib.URIRef('http://www.w3.org/ns/sosa/'))
g1.bind('qudt-unit',rdflib.URIRef('http://qudt.org/vocab/unit/'))
g1.bind('qudt',rdflib.URIRef('http://qudt.org/schema/qudt/'))
for row in result: g1.add(row)
print(g1.serialize(format='ttl'))

@prefix qudt: <http://qudt.org/schema/qudt/> .
@prefix qudt-unit: <http://qudt.org/vocab/unit/> .
@prefix sensordata: <https://github.com/building-energy/ABCE_Open_Data_Project/tree/main/datasets/Hobo_U12_sensor_data_v2#> .
@prefix sosa: <http://www.w3.org/ns/sosa/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sensordata:Observation0_Result a sosa:Result ;
    qudt:unit qudt-unit:DEG_C ;
    qudt:value 1.9865e+01 .

sensordata:Observation1_Result a sosa:Result ;
    qudt:unit qudt-unit:PERCENT ;
    qudt:value 5.9728e+01 .

sensordata:Observation2_Result a sosa:Result ;
    qudt:unit qudt-unit:LUX ;
    qudt:value 1.18e+01 .

sensordata:Observation3_Result qudt:value 1.9817e+01 .


