In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# !pip install wurlitzer

In [3]:
%load_ext wurlitzer

In [4]:
import pandas as pd
import numpy as np
import logging
logging.basicConfig(level = logging.INFO)

In [5]:
import pyfdb
import findlibs
import yaml
from pathlib import Path
import os
import pandas as pd
import pyodc
import shutil

fdb5_path = Path(findlibs.find("fdb5"))
metkit_path = Path(findlibs.find("metkit"))
print(f"Using fdb5 shared library from {fdb5_path}")
print(f"Using metkit shared library from {metkit_path}")

marsrequest = (metkit_path.parents[1] / "share/metkit/odb/marsrequest.yaml").resolve()
languages = (metkit_path.parents[1] / "share/metkit/language.yaml").resolve() 

print(f"\nContents of {marsrequest}:\n", marsrequest.open().read())

fields = " ".join(yaml.safe_load(languages.open().read())["_field"].keys())
print(f"Fields defined in {languages}:\n {fields}")

Using fdb5 shared library from /Users/math/micromamba/envs/ionbeam/lib/libfdb5.dylib
Using metkit shared library from /Users/math/micromamba/envs/ionbeam/lib/libmetkit.dylib

Contents of /Users/math/micromamba/envs/ionbeam/share/metkit/odb/marsrequest.yaml:
 ---
PLATFORM: platform
OBSERVATION_VARIABLE: observation_variable
Fields defined in /Users/math/micromamba/envs/ionbeam/share/metkit/language.yaml:
 observation_variable platform class type stream expver dataset model repres obsgroup reportype levtype levelist param date hdate offsetdate fcmonth fcperiod time offsettime step anoffset reference number quantile domain frequency direction diagnostic iteration channel ident instrument method origin system


In [6]:
from pathlib import Path
from ionbeam.core.config_parser import parse_config
from ionbeam.core.bases import *
from IPython.display import display

config_file = Path("~/git/IonBeam-Deployment/config/ionbeam").expanduser()
config, actions = parse_config(config_file,
                    config_path = "./",
                    data_path = "../data/",
                    offline = True,
                    environment  = "local",
                    sources = ["smart_citizen_kit"]
                    )



In [7]:
from uuid import UUID

id2action = {a.id : a for a in actions}
sources = [a for a in actions if isinstance(a, Source)]
processors = [a for a in actions if not isinstance(a, Source)]

chains = [[s,] for s in sources]
for a in processors:
    if not isinstance(a.match, UUID): 
        chains.append([str(a.match), a,])
        continue
    for c in chains:
        matches = a.match.int == c[-1].id.int
        if matches: c.append(a)
            

for c in chains:
    print(" --> ".join(str(a) for a in c))

SmartCitizenKitSource --> CSVParser --> GenerateMetaData --> TimeAggregator --> ODCEncoder
[Match(state = 'odc_encoded')] --> RESTWriter()


In [8]:
sources, stateless_actions, aggregators = [], [], []
for action in actions:
    if isinstance(action, Source):
        sources.append(action)
    elif isinstance(action, Aggregator):
        aggregators.append(action)
    else:
        stateless_actions.append(action)

print("Sources")
for i, a in enumerate(sources):
    display(a)

print("Aggregators")
for i, a in enumerate(aggregators):
    display(a)

print("Actions")
for i, a in enumerate(stateless_actions):
    display(a)

Sources


0,1
id,e3cfa3d1-c073-4f03-9f94-4e14bb190208
finish_after,
copy_metadata_to_columns,"[InputColumn(name='sensor_name', key='sensor.name', type=None, unit=None, discard=False, canonica..."
cache_version,1
use_cache,True
cache_directory,/Users/math/git/IonBeam-Deployment/data/inputs/smart_citizen_kit
value_columns,"[InputColumn(name='total_volatile_organic_compounds', key='TVOC', type=None, unit='ppb', discard=..."

name,value
source_action_id,e3cfa3d1-c073-4f03-9f94-4e14bb190208
state,raw
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='time', unit=None, desc='The time that the observation was made.'), Canon..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2023, 6, 10, 0, 0, tzinfo=datetime.timez..."


Aggregators


0,1
id,bec56295-86b5-4b94-9294-1a62afe44dce
match,5796e58b-81a7-4793-8d26-db5dda7dfc49
granularity,1h
time_direction,forwards
emit_after_hours,200

name,value
source_action_id,bec56295-86b5-4b94-9294-1a62afe44dce
state,time_aggregated
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='time', unit=None, desc='The time that the observation was made.'), Canon..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2023, 6, 10, 0, 0, tzinfo=datetime.timez..."


Actions


0,1
id,98cef1b2-a331-42b3-9f31-dcfdc2acf4e2
match,e3cfa3d1-c073-4f03-9f94-4e14bb190208
identifying_columns,"[InputColumn(name='time', key='time', type='datetime', unit=None, discard=False, canonical_variab..."
value_columns,"[InputColumn(name='total_volatile_organic_compounds', key='TVOC', type=None, unit='ppb', discard=..."
metadata_columns,"[InputColumn(name='author', key='device.name', type=None, unit=None, discard=False, canonical_var..."
separator,","
custom_nans,

name,value
source_action_id,98cef1b2-a331-42b3-9f31-dcfdc2acf4e2
state,parsed
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='time', unit=None, desc='The time that the observation was made.'), Canon..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2023, 6, 10, 0, 0, tzinfo=datetime.timez..."

name,key,type,unit,discard,canonical_variable
total_volatile_organic_compounds,TVOC,,ppb,False,"{'name': 'total_volatile_organic_compounds', 'desc': None, 'unit': 'ppb', 'CRS': None, 'WMO': Fal..."
equivalent_carbon_dioxide,eCO2,,ppm,False,"{'name': 'equivalent_carbon_dioxide', 'desc': None, 'unit': 'ppm', 'CRS': None, 'WMO': False, 'dt..."
ambient_light,Light,,lux,False,"{'name': 'ambient_light', 'desc': None, 'unit': 'lux', 'CRS': None, 'WMO': False, 'dtype': 'float..."
noise_dB,Noise Level,,dB,False,"{'name': 'noise_dB', 'desc': None, 'unit': 'dB', 'CRS': None, 'WMO': False, 'dtype': 'float64', '..."
noise_dBA,Noise Level,,dBA,False,"{'name': 'noise_dBA', 'desc': None, 'unit': 'dBA', 'CRS': None, 'WMO': False, 'dtype': 'float64',..."
air_pressure_near_surface,Barometric Pressure,,kPa,False,"{'name': 'air_pressure_near_surface', 'desc': None, 'unit': 'Pa', 'CRS': None, 'WMO': False, 'dty..."
nitrogen_oxide,NO2,,ppb,False,"{'name': 'nitrogen_oxide', 'desc': None, 'unit': 'ppm', 'CRS': None, 'WMO': False, 'dtype': 'floa..."
particulate_matter_1,PM 1,,µg/m3,False,"{'name': 'particulate_matter_1', 'desc': None, 'unit': 'μg/m3', 'CRS': None, 'WMO': False, 'dtype..."
particulate_matter_1,PM 1,,ug/m3,False,"{'name': 'particulate_matter_1', 'desc': None, 'unit': 'μg/m3', 'CRS': None, 'WMO': False, 'dtype..."
particulate_matter_10,PM 10,,µg/m3,False,"{'name': 'particulate_matter_10', 'desc': None, 'unit': 'μg/m3', 'CRS': None, 'WMO': False, 'dtyp..."


0,1
id,5796e58b-81a7-4793-8d26-db5dda7dfc49
match,98cef1b2-a331-42b3-9f31-dcfdc2acf4e2

name,value
source_action_id,5796e58b-81a7-4793-8d26-db5dda7dfc49
state,parsed
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='time', unit=None, desc='The time that the observation was made.'), Canon..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2023, 6, 10, 0, 0, tzinfo=datetime.timez..."


0,1
id,887b49b7-5bc2-44f0-92fe-5aa8759bf08c
match,bec56295-86b5-4b94-9294-1a62afe44dce
output,outputs/{source}/odb/{observation_variable}/{observation_variable}_{time_slice.start_time}.odb
MARS_keys,"[MARS_Key(name='class', dtype=<DataType.STRING: 3>, fill_method='constant', value='rd', key=None,..."
one_file_per_granule,True
columns_to_metadata,[]
seconds,True
minutes,True

name,value
source_action_id,887b49b7-5bc2-44f0-92fe-5aa8759bf08c
state,odc_encoded
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='time', unit=None, desc='The time that the observation was made.'), Canon..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2023, 6, 10, 0, 0, tzinfo=datetime.timez..."


0,1
id,b4ba8f19-9878-4820-8e79-7c3109ccfc3a
match,[Match(state = 'odc_encoded')]

name,value
source_action_id,b4ba8f19-9878-4820-8e79-7c3109ccfc3a
state,written
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='time', unit=None, desc='The time that the observation was made.'), Canon..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2023, 6, 10, 0, 0, tzinfo=datetime.timez..."


In [9]:
sck_source = actions[0]
sck_source.value_columns

[InputColumn(name='total_volatile_organic_compounds', key='TVOC', type=None, unit='ppb', discard=False, canonical_variable=None),
 InputColumn(name='equivalent_carbon_dioxide', key='eCO2', type=None, unit='ppm', discard=False, canonical_variable=None),
 InputColumn(name='ambient_light', key='Light', type=None, unit='%', discard=True, canonical_variable=None),
 InputColumn(name='ambient_light', key='Light', type=None, unit='lux', discard=False, canonical_variable=None),
 InputColumn(name='noise_dB', key='Noise Level', type=None, unit='dB', discard=False, canonical_variable=None),
 InputColumn(name='noise_dBA', key='Noise Level', type=None, unit='dBA', discard=False, canonical_variable=None),
 InputColumn(name='air_pressure_near_surface', key='Barometric Pressure', type=None, unit='kPa', discard=False, canonical_variable=None),
 InputColumn(name='nitrogen_oxide', key='no2', type=None, unit='kOhm', discard=True, canonical_variable=None),
 InputColumn(name='nitrogen_oxide', key='NO2', type

In [10]:
import json
devices = sck_source.get_ICHANGE_devices()
print(json.dumps(devices[0], indent = 4))

{
    "id": 28,
    "uuid": "856e9670-03d1-481e-b9c5-6bf16474d993",
    "name": "Manyi_08230",
    "description": "Desde el 3/5/13: en exterior.\nEmplazamiento: Zona arbolada en limite de Parque Natural Sant Lloren\u00e7.\nSituado a unos 60m de la calle con trafico mas pr\u00f3xima.\n\nColocaci\u00f3n sensor: desde ahora; 2/4/14 en caja kit.\nSituado bajo placa solar.\n\nAlimentaci\u00f3n del sensor: Desde el 19/05/2013, conectado a placa solar de 12V-610mA Ref: ET-M53610 encarada a 182 Sur, con unos 45grados de inclinaci\u00f3n. Desde 2/4/14, sin placa solar, alimentado permanente con alimentador.",
    "state": "has_published",
    "system_tags": [
        "offline",
        "outdoor"
    ],
    "user_tags": [
        "Barcelona"
    ],
    "last_reading_at": "2014-04-04T18:30:16Z",
    "created_at": "2013-04-24T22:17:56Z",
    "updated_at": "2024-06-27T17:03:14Z",
    "notify": {
        "stopped_publishing": false,
        "low_battery": false
    },
    "device_token": "[FILTERED]

In [11]:
devices[0]["data"]["sensors"][0]

{'id': 10,
 'ancestry': None,
 'name': 'Battery SCK',
 'description': 'Custom Circuit',
 'unit': '%',
 'created_at': '2015-02-02T18:18:00Z',
 'updated_at': '2020-12-11T16:12:40Z',
 'uuid': 'c9ff2784-53a7-4a84-b0fc-90ecc7e313f9',
 'default_key': 'bat',
 'datasheet': None,
 'unit_definition': None,
 'measurement': {'id': 7,
  'name': 'battery',
  'description': 'The SCK remaining battery level in percentage.',
  'unit': None,
  'uuid': 'c5964926-c2d2-4714-98b5-18f84c6f95c1',
  'definition': None},
 'value': 71.7,
 'prev_value': 71.7,
 'last_reading_at': '2014-04-04T18:30:16Z',
 'tags': []}

In [12]:
from ionbeam.metadata.db import init_db

init_db(config.globals)

INFO:ionbeam.metadata.db:Adding 'Sensor.Community' to Authors table
INFO:ionbeam.metadata.db:Adding 'Meteotracker' to Authors table
INFO:ionbeam.metadata.db:Adding 'Acronet' to Authors table
INFO:ionbeam.metadata.db:Adding 'SmartCitizenKit' to Authors table


In [13]:
from ionbeam.metadata import db
from shapely.geometry import Point

from ionbeam.sources.smart_citizen_kit.metadata import construct_sck_metadata

device = devices[0]
construct_sck_metadata(sck_source, device)

In [14]:
from sqlalchemy.orm import Session
device = devices[0]
id = device["id"]


with Session(sck_source.globals.sql_engine) as session:
    stations = session.query(db.Station).all()
    station = stations[0]
    print("Retrieved station: ", station)
    j = station.as_json()
    print(station)
    print(station.sensors[1])
    print(station.sensors[1].properties)

print(json.dumps(j, indent = 4))

Retrieved station:  Station(id=9fe29acc-9b47-460e-a958-b23695e2e3af, external_id='28')
Station(id=9fe29acc-9b47-460e-a958-b23695e2e3af, external_id='28')
Sensor(id=2, name='POM-3044P-R')
[Property(key='noise_dB', name='noise_dB', unit='dB', description=None)]
{
    "name": "Manyi_08230",
    "description": "Desde el 3/5/13: en exterior.\nEmplazamiento: Zona arbolada en limite de Parque Natural Sant Lloren\u00e7.\nSituado a unos 60m de la calle con trafico mas pr\u00f3xima.\n\nColocaci\u00f3n sensor: desde ahora; 2/4/14 en caja kit.\nSituado bajo placa solar.\n\nAlimentaci\u00f3n del sensor: Desde el 19/05/2013, conectado a placa solar de 12V-610mA Ref: ET-M53610 encarada a 182 Sur, con unos 45grados de inclinaci\u00f3n. Desde 2/4/14, sin placa solar, alimentado permanente con alimentador.",
    "platform": "Smart Citizen Kit",
    "external_id": "28",
    "location": [
        2.01106,
        41.61441
    ],
    "geojson": {
        "type": "Point",
        "coordinates": [
          

In [15]:

for device in devices:
    construct_sck_metadata(sck_source, device)



In [16]:
online = True
if online:
    source = sources[0]
    source.globals.offline = !online
    source.use_cache = False
    incoming_message_stream = source.generate()
    message = next(incoming_message_stream)
    message
else:
    p = Path('/Users/math/git/IonBeam-Deployment/data/inputs/meteotracker/MeteoTracker_62ae154f1d8e11061d4474b2.csv')
    message = FileMessage(
        metadata=MetaData(state='raw',
            source='meteotracker',
            filepath= p,
            variables = list(pd.read_csv(p).columns),             
            ))
message

name,value
source_action_id,e3cfa3d1-c073-4f03-9f94-4e14bb190208
state,raw
mars_request,{}
unstructured,"{'device_id': 15496, 'sensor_key': 'tvoc', 'sensor_id': 113, 'component_id': 72773, 'rollup': '1s..."

time,tvoc,sensor_name
2024-01-28T09:17:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:16:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:15:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:14:40Z,542,AMS CCS811 - TVOC
2024-01-28T09:13:40Z,542,AMS CCS811 - TVOC
2024-01-28T09:12:40Z,542,AMS CCS811 - TVOC
2024-01-28T09:11:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:10:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:09:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:08:40Z,547,AMS CCS811 - TVOC


In [17]:
from ionbeam.core.bases import FinishMessage, FileMessage, MetaData
from ionbeam.aggregators import TimeAggregator

possible_actions = [a for a in actions if not isinstance(a, Source)]

message_history = []
while True:
    display(message)
    message_history.append(message)
    matching = [action for action in possible_actions if action.matches(message)]
    
    if not matching: 
        print("No more matches")
        break

    print("That matched with: ", [str(a) for a in matching])
    action = matching[0]

    # Special case for the TimeAggregator
    if action.__class__.__name__ == "TimeAggregator":
        list(action.process(message)) #need the list call here to pump the iterator to completion even if it doesn't return anything
        message = next(action.process(FinishMessage("We're done!")))
    else:
        message = next(action.process(message))
        

name,value
source_action_id,e3cfa3d1-c073-4f03-9f94-4e14bb190208
state,raw
mars_request,{}
unstructured,"{'device_id': 15496, 'sensor_key': 'tvoc', 'sensor_id': 113, 'component_id': 72773, 'rollup': '1s..."

time,tvoc,sensor_name
2024-01-28T09:17:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:16:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:15:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:14:40Z,542,AMS CCS811 - TVOC
2024-01-28T09:13:40Z,542,AMS CCS811 - TVOC
2024-01-28T09:12:40Z,542,AMS CCS811 - TVOC
2024-01-28T09:11:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:10:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:09:40Z,547,AMS CCS811 - TVOC
2024-01-28T09:08:40Z,547,AMS CCS811 - TVOC


That matched with:  ['CSVParser']


AssertionError: 

In [None]:
m = message_history[-1]
m.data

In [None]:

df = pd.DataFrame({"class" : ["rd", "rd"], "col1" : [1,2], "col2" : [1,2]})
pyodc.encode_odb(df, "test.odb")

with open("test.odb", "rb") as f:
    pyfdb.archive(f.read())

In [None]:
import unicodedata as u

cs =  "¬!\"#£$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~°Ωαβγδθλμπωϕ£"

def unicode_info(c):
    d = dict(
        character = c,
        utf8 = c.encode("utf_8"),
        # utf16 = c.encode("utf_16"),
        name = u.name(c, "NO NAME"),
        category = u.category(c),
    )
    if u.decomposition(c): d["decomposition"] = u.decomposition(c)
    if not u.is_normalized('NFKD', c): d["normalized_name"] = u.name(u.normalize('NFKD', c))
    if not u.is_normalized('NFKD', c): print("\n".join(f"{k}: {v}" for k, v in d.items()) + "\n")

for c in cs: unicode_info(c)

In [None]:
chr(0x1F600)

In [None]:
s = "".join(chr(i) for i in range(100000))
with open("test.txt", "wb") as f:
    f.write(s.encode("utf-8", errors='ignore'))

In [20]:
import unicodedata as u
u.name("·")

'MIDDLE DOT'