# Create metadata

This notebook creates a metadata.json file based on the EnergyPlus eplusout.csv file.

## Setup

In [1]:
import requests
import csv
import json

## Get csv file from figshare

In [2]:
csv_download_url='https://figshare.com/ndownloader/files/35934071?private_link=464885898d0041bfa8fd'
response=requests.get(csv_download_url)
csv_text=response.text
csv_text.split('\n')[0]

'Date/Time,Environment:Site Outdoor Air Drybulb Temperature [C](Hourly),Environment:Site Total Sky Cover [](Hourly),Environment:Site Opaque Sky Cover [](Hourly),Environment:Site Daylight Saving Time Status [](Daily),Environment:Site Day Type Index [](Daily),ZONE ONE:Zone Total Internal Latent Gain Energy [J](Hourly),TEST 352A:Other Equipment Total Heating Energy [J](Monthly),TEST 352 MINUS:Other Equipment Total Heating Energy [J](Monthly),ZONE ONE:Zone Other Equipment Total Heating Energy [J](Monthly),ZONE ONE:Zone Mean Radiant Temperature [C](Hourly),ZN001:WALL001:Surface Inside Face Temperature [C](Daily),ZN001:WALL001:Surface Outside Face Temperature [C](Daily),ZN001:WALL001:Surface Inside Face Convection Heat Transfer Coefficient [W/m2-K](Daily),ZN001:WALL001:Surface Outside Face Convection Heat Transfer Coefficient [W/m2-K](Daily),ZN001:WALL002:Surface Inside Face Temperature [C](Daily),ZN001:WALL002:Surface Outside Face Temperature [C](Daily),ZN001:WALL002:Surface Inside Face Con

## Get header row

In [3]:
csv_reader=csv.reader(csv_text.splitlines())
header_row=next(csv_reader)
header_row=[x.strip() for x in header_row]
header_row

['Date/Time',
 'Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)',
 'Environment:Site Total Sky Cover [](Hourly)',
 'Environment:Site Opaque Sky Cover [](Hourly)',
 'Environment:Site Daylight Saving Time Status [](Daily)',
 'Environment:Site Day Type Index [](Daily)',
 'ZONE ONE:Zone Total Internal Latent Gain Energy [J](Hourly)',
 'TEST 352A:Other Equipment Total Heating Energy [J](Monthly)',
 'TEST 352 MINUS:Other Equipment Total Heating Energy [J](Monthly)',
 'ZONE ONE:Zone Other Equipment Total Heating Energy [J](Monthly)',
 'ZONE ONE:Zone Mean Radiant Temperature [C](Hourly)',
 'ZN001:WALL001:Surface Inside Face Temperature [C](Daily)',
 'ZN001:WALL001:Surface Outside Face Temperature [C](Daily)',
 'ZN001:WALL001:Surface Inside Face Convection Heat Transfer Coefficient [W/m2-K](Daily)',
 'ZN001:WALL001:Surface Outside Face Convection Heat Transfer Coefficient [W/m2-K](Daily)',
 'ZN001:WALL002:Surface Inside Face Temperature [C](Daily)',
 'ZN001:WALL002:Surface Outside 

## Function to create column descriptions

In [4]:
base_url=r"https://bigladdersoftware.com/epx/docs/9-6/input-output-reference/"
reference_dict={
    "Site Outdoor Air Drybulb Temperature": \
        "group-location-climate-weather-file-access.html#site-outdoor-air-drybulb-temperature-c",
    "Zone Mean Radiant Temperature": \
        "group-thermal-zone-description-geometry.html#zone-mean-radiant-temperature-c-1",
    "Site Total Sky Cover": \
        "group-location-climate-weather-file-access.html#site-total-sky-cover",
    "Site Opaque Sky Cover": \
        "group-location-climate-weather-file-access.html#site-opaque-sky-cover",
    "Zone Mean Air Temperature": \
        "group-thermal-zone-description-geometry.html#zone-mean-air-temperature-c-1",
    "Zone Air Heat Balance Surface Convection Rate": \
        "group-thermal-zone-description-geometry.html#zone-air-heat-balance-surface-convection-rate-w",
    "Zone Air Heat Balance Air Energy Storage Rate": \
        "group-thermal-zone-description-geometry.html#zone-air-heat-balance-air-energy-storage-rate-w",
    "Site Daylight Saving Time Status": \
        "group-location-climate-weather-file-access.html#site-daylight-saving-time-status",
    "Site Day Type Index": \
        "group-location-climate-weather-file-access.html#site-day-type-index",
    "Zone Total Internal Latent Gain Energy": \
        "group-thermal-zone-description-geometry.html#zone-total-internal-latent-gain-energy-j",
    "Other Equipment Total Heating Energy": \
        "group-internal-gains-people-lights-other.html#outputs-5-004",
    "Surface Inside Face Temperature": \
        "group-thermal-zone-description-geometry.html#surface-inside-face-temperature-c",
    "Surface Outside Face Temperature": \
        "group-thermal-zone-description-geometry.html#surface-outside-face-temperature-c",
    "Surface Inside Face Convection Heat Transfer Coefficient": \
        "group-thermal-zone-description-geometry.html#surface-inside-face-convection-heat-transfer-coefficient-wm2-k",
    "Surface Outside Face Convection Heat Transfer Coefficient": \
        "group-thermal-zone-description-geometry.html#surface-outside-face-convection-heat-transfer-coefficient-wm2-k"
}

qudt_dict={
    #"": "http://qudt.org/vocab/unit/FRACTION",
    "C": "http://qudt.org/vocab/unit/DEG_C",
    "W": "http://qudt.org/vocab/unit/W-PER-M",
    "J": "http://qudt.org/vocab/unit/J",
    "W/m2-K": "http://qudt.org/vocab/unit/W-PER-M2-K"
}


time_interval_dict={
    "Hourly": "H1",
    "Daily": "D1",
    "Monthly": "M1"    
}


def create_column_description(header):
    ""
    d={
        "@type": "Column",
        "titles": header,
    }
    
    variable=header.split('[')[0].strip()
    
    try:
        units=header.split('[')[1].split(']')[0].strip()
    except IndexError:
        units=None
        
    try:
        time_interval=header.split('(')[1].split(')')[0].strip()
    except IndexError:
        time_interval=None
    
    # dc:description
    d["dc:description"]= header
    
    # schema:variableMeasured
    d['schema:variableMeasured']=variable
    
    # schema:unitText
    if units:
        d['schema:unitText']=units
        
    # schema:duration
    if time_interval:
        d['schema:duration']=time_interval_dict[time_interval]
    
    # dc:reference
    reference_url=None
    for k,v in reference_dict.items():
        if variable.endswith(k):
            reference_url=v
    if reference_url:
        d['dc:references']={"@id":f"{base_url}{reference_url}"}
    else:
        if not variable is None:
            print(variable)
        
    # http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure
    qudt_url=qudt_dict.get(units,None)
    if qudt_url:
        d['http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure']={"@id":f"{qudt_url}"}
    else:
        print(units)
    
    # datatype
    if variable=='Date/Time':
        d['datatype']='string'
    else:
        d['datatype']='number'
        
    # other comments
    if variable=='Date/Time':
        d['rdfs:comment']='The Date/Time column contains a non-standard date and time format which does match any of the CSVW data format options.'
        
    return d

# test below
header='Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)'
create_column_description(header)

{'@type': 'Column',
 'titles': 'Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)',
 'dc:description': 'Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)',
 'schema:variableMeasured': 'Environment:Site Outdoor Air Drybulb Temperature',
 'schema:unitText': 'C',
 'schema:duration': 'H1',
 'dc:references': {'@id': 'https://bigladdersoftware.com/epx/docs/9-6/input-output-reference/group-location-climate-weather-file-access.html#site-outdoor-air-drybulb-temperature-c'},
 'http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure': {'@id': 'http://qudt.org/vocab/unit/DEG_C'},
 'datatype': 'number'}

## Create & save metadata dict

In [5]:
d={
    "@context": "http://www.w3.org/ns/csvw",
    "@type": "Table",
    "url": "eplusout.csv",
    "dc:title": "EnergyPlus simulation test",
    "dc:description": "An EnergyPlus simulation of an office room in Sir Frank Gibb building, Loughborough University",
    "dc:creator": "ABCE Open Research Team",   
    "tableSchema": {
        "@type": "Schema",
        "columns": [create_column_description(header) for header in header_row]
        }
}
with open('eplusout.csv-metadata.json','w') as f:
    json.dump(d, f, indent=4)
d

Date/Time
None






{'@context': 'http://www.w3.org/ns/csvw',
 '@type': 'Table',
 'url': 'eplusout.csv',
 'dc:title': 'EnergyPlus simulation test',
 'dc:description': 'An EnergyPlus simulation of an office room in Sir Frank Gibb building, Loughborough University',
 'dc:creator': 'ABCE Open Research Team',
 'tableSchema': {'@type': 'Schema',
  'columns': [{'@type': 'Column',
    'titles': 'Date/Time',
    'dc:description': 'Date/Time',
    'schema:variableMeasured': 'Date/Time',
    'datatype': 'string',
    'rdfs:comment': 'The Date/Time column contains a non-standard date and time format which does match any of the CSVW data format options.'},
   {'@type': 'Column',
    'titles': 'Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)',
    'dc:description': 'Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)',
    'schema:variableMeasured': 'Environment:Site Outdoor Air Drybulb Temperature',
    'schema:unitText': 'C',
    'schema:duration': 'H1',
    'dc:references': {'@id': 'https://bi