In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# !pip install wurlitzer

In [3]:
%load_ext wurlitzer

In [4]:
import pandas as pd
import numpy as np
import logging
logging.basicConfig(level = logging.INFO)

In [5]:
import pyfdb
import findlibs
import yaml
from pathlib import Path
import os
import pandas as pd
import pyodc
import shutil

fdb5_path = Path(findlibs.find("fdb5"))
metkit_path = Path(findlibs.find("metkit"))
print(f"Using fdb5 shared library from {fdb5_path}")
print(f"Using metkit shared library from {metkit_path}")

marsrequest = (metkit_path.parents[1] / "share/metkit/odb/marsrequest.yaml").resolve()
languages = (metkit_path.parents[1] / "share/metkit/language.yaml").resolve() 

print(f"\nContents of {marsrequest}:\n", marsrequest.open().read())

fields = " ".join(yaml.safe_load(languages.open().read())["_field"].keys())
print(f"Fields defined in {languages}:\n {fields}")

Using fdb5 shared library from /Users/math/micromamba/envs/ionbeam/lib/libfdb5.dylib
Using metkit shared library from /Users/math/micromamba/envs/ionbeam/lib/libmetkit.dylib

Contents of /Users/math/micromamba/envs/ionbeam/share/metkit/odb/marsrequest.yaml:
 ---
PLATFORM: platform
OBSERVATION_VARIABLE: observation_variable
Fields defined in /Users/math/micromamba/envs/ionbeam/share/metkit/language.yaml:
 observation_variable platform class type stream expver dataset model repres obsgroup reportype levtype levelist param date hdate offsetdate fcmonth fcperiod time offsettime step anoffset reference number quantile domain frequency direction diagnostic iteration channel ident instrument method origin system


In [6]:
from pathlib import Path
from ionbeam.core.config_parser import parse_config
from ionbeam.core.bases import *
from IPython.display import display

config_file = Path("~/git/IonBeam-Deployment/config/ionbeam").expanduser()
config, actions = parse_config(config_file,
                    config_path = "./",
                    data_path = "../data/",
                    offline = True,
                    environment  = "local",
                   sources = ["smart_citizen_kit"]
                    )



In [7]:
from uuid import UUID

id2action = {a.id : a for a in actions}
sources = [a for a in actions if isinstance(a, Source)]
processors = [a for a in actions if not isinstance(a, Source)]

chains = [[s,] for s in sources]
for a in processors:
    if not isinstance(a.match, UUID): 
        chains.append([str(a.match), a,])
        continue
    for c in chains:
        matches = a.match.int == c[-1].id.int
        if matches: c.append(a)
            

for c in chains:
    print(" --> ".join(str(a) for a in c))

SmartCitizenKitSource --> CSVParser --> Splitter --> NewTimeAggregator --> ODCEncoder
[Match(state = 'odc_encoded')] --> RESTWriter()


In [8]:
sources, stateless_actions, aggregators = [], [], []
for action in actions:
    if isinstance(action, Source):
        sources.append(action)
    elif isinstance(action, Aggregator):
        aggregators.append(action)
    else:
        stateless_actions.append(action)

print("Sources")
for i, a in enumerate(sources):
    display(a)

print("Aggregators")
for i, a in enumerate(aggregators):
    display(a)

print("Actions")
for i, a in enumerate(stateless_actions):
    display(a)

Sources


0,1
id,7147bfbc-1b18-4931-b938-29547471c9ec
mappings,"[InputColumn(name='time', key='time', type=None, unit=None, discard=False, canonical_variable=Non..."
finish_after,
copy_metadata_to_columns,"[station_name, station_id, lat, lon, created_at, city, country, author]"
cache_version,3
use_cache,True
cache_directory,/Users/math/git/IonBeam-Deployment/data/inputs/smart_citizen_kit

name,value
source_action_id,7147bfbc-1b18-4931-b938-29547471c9ec
state,raw
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='sensor_name', unit=None, desc='The name of the sensor that made the obse..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2024, 8, 2, 9, 42, 49, 950026, tzinfo=da..."


Aggregators


0,1
id,7de47ea3-39c5-416f-8d06-b3726d8c99f8
match,7482fca9-3b8b-464c-8651-9bdf440a7608
granularity_hours,1
time_direction,forwards
min_emit_after_hours,96
emit_after_multiplier,5
time_chunks,{}
time_frontier,

name,value
source_action_id,7de47ea3-39c5-416f-8d06-b3726d8c99f8
state,time_aggregated
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='sensor_name', unit=None, desc='The name of the sensor that made the obse..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2024, 8, 2, 9, 42, 49, 950026, tzinfo=da..."


Actions


0,1
id,8293a7bc-7aa8-4b2e-a30e-329421abc316
match,7147bfbc-1b18-4931-b938-29547471c9ec
mappings,"[InputColumn(name='time', key='time', type=None, unit=None, discard=False, canonical_variable=Can..."
identifying_keys,[time]
metadata_keys,"[station_name, station_id, author, lat, lon, created_at, city, country]"
separator,","
custom_nans,

name,value
source_action_id,8293a7bc-7aa8-4b2e-a30e-329421abc316
state,parsed
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='sensor_name', unit=None, desc='The name of the sensor that made the obse..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2024, 8, 2, 9, 42, 49, 950026, tzinfo=da..."

name,key,type,unit,discard,canonical_variable
total_volatile_organic_compounds,TVOC,,ppb,False,"{'name': 'total_volatile_organic_compounds', 'desc': None, 'unit': 'ppb', 'CRS': None, 'WMO': Fal..."
total_volatile_organic_compounds,tvoc,,ppb,False,"{'name': 'total_volatile_organic_compounds', 'desc': None, 'unit': 'ppb', 'CRS': None, 'WMO': Fal..."
equivalent_carbon_dioxide,eco2,,ppm,False,"{'name': 'equivalent_carbon_dioxide', 'desc': None, 'unit': 'ppm', 'CRS': None, 'WMO': False, 'dt..."
equivalent_carbon_dioxide,eCO2,,ppm,False,"{'name': 'equivalent_carbon_dioxide', 'desc': None, 'unit': 'ppm', 'CRS': None, 'WMO': False, 'dt..."
ambient_light,Light,,lux,False,"{'name': 'ambient_light', 'desc': None, 'unit': 'lux', 'CRS': None, 'WMO': False, 'dtype': 'float..."
ambient_light,light,,lux,False,"{'name': 'ambient_light', 'desc': None, 'unit': 'lux', 'CRS': None, 'WMO': False, 'dtype': 'float..."
noise_dB,Noise Level,,dB,False,"{'name': 'noise_dB', 'desc': None, 'unit': 'dB', 'CRS': None, 'WMO': False, 'dtype': 'float64', '..."
noise_dBA,Noise Level,,dBA,False,"{'name': 'noise_dBA', 'desc': None, 'unit': 'dBA', 'CRS': None, 'WMO': False, 'dtype': 'float64',..."
noise_dBA,noise_dba,,dBA,False,"{'name': 'noise_dBA', 'desc': None, 'unit': 'dBA', 'CRS': None, 'WMO': False, 'dtype': 'float64',..."
air_pressure_near_surface,Barometric Pressure,,kPa,False,"{'name': 'air_pressure_near_surface', 'desc': None, 'unit': 'Pa', 'CRS': None, 'WMO': False, 'dty..."


0,1
id,7482fca9-3b8b-464c-8651-9bdf440a7608
match,8293a7bc-7aa8-4b2e-a30e-329421abc316
identifying_keys,[time]
metadata_keys,"[station_name, author, lat, lon, created_at, city, country]"

name,value
source_action_id,7482fca9-3b8b-464c-8651-9bdf440a7608
state,parsed
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='sensor_name', unit=None, desc='The name of the sensor that made the obse..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2024, 8, 2, 9, 42, 49, 950026, tzinfo=da..."


0,1
id,0880c8b7-b524-4a98-b95f-a7a9ef39f29c
match,7de47ea3-39c5-416f-8d06-b3726d8c99f8
output,outputs/{source}/odb/{observation_variable}/{observation_variable}_{time_slice.start_time}.odb
MARS_keys,"[MARS_Key(name='class', dtype=<DataType.STRING: 3>, fill_method='constant', value='rd', key=None,..."
one_file_per_granule,True
columns_to_metadata,[]
seconds,True
minutes,True

name,value
source_action_id,0880c8b7-b524-4a98-b95f-a7a9ef39f29c
state,odc_encoded
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='sensor_name', unit=None, desc='The name of the sensor that made the obse..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2024, 8, 2, 9, 42, 49, 950026, tzinfo=da..."


0,1
id,0ebfdbc3-dbed-4f67-acb8-af9bced0d8a5
match,[Match(state = 'odc_encoded')]

name,value
source_action_id,0ebfdbc3-dbed-4f67-acb8-af9bced0d8a5
state,written
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='sensor_name', unit=None, desc='The name of the sensor that made the obse..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2024, 8, 2, 9, 42, 49, 950026, tzinfo=da..."


In [9]:
online = True
if online:
    source = sources[0]
    source.globals.offline = !online
    source.use_cache = False
    incoming_message_stream = source.generate()
    message = next(incoming_message_stream)
    message
else:
    p = Path('/Users/math/git/IonBeam-Deployment/data/inputs/meteotracker/MeteoTracker_62ae154f1d8e11061d4474b2.csv')
    message = FileMessage(
        metadata=MetaData(state='raw',
            source='meteotracker',
            filepath= p,
            variables = list(pd.read_csv(p).columns),             
            ))
message

StopIteration: 

In [None]:
from ionbeam.core.bases import FinishMessage, FileMessage, MetaData
from ionbeam.aggregators import TimeAggregator

possible_actions = [a for a in actions if not isinstance(a, Source)]

message_history = []
while True:
    display(message)
    message_history.append(message)
    matching = [action for action in possible_actions if action.matches(message)]
    
    if not matching: 
        print("No more matches")
        break

    print("That matched with: ", [str(a) for a in matching])
    action = matching[0]

    # Special case for the TimeAggregator
    if action.__class__.__name__ == "TimeAggregator":
        list(action.process(message)) #need the list call here to pump the iterator to completion even if it doesn't return anything
        message = next(action.process(FinishMessage("We're done!")))
    else:
        message = next(action.process(message))
        

In [None]:
m = message_history[-1]
m.data

In [None]:

df = pd.DataFrame({"class" : ["rd", "rd"], "col1" : [1,2], "col2" : [1,2]})
pyodc.encode_odb(df, "test.odb")

with open("test.odb", "rb") as f:
    pyfdb.archive(f.read())