In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# !pip install wurlitzer

In [3]:
%load_ext wurlitzer

In [4]:
import pandas as pd
import numpy as np
import logging
logging.basicConfig(level = logging.INFO)

In [5]:
import pyfdb
import findlibs
import yaml
from pathlib import Path
import os
import pandas as pd
import pyodc
import shutil

fdb5_path = Path(findlibs.find("fdb5"))
metkit_path = Path(findlibs.find("metkit"))
print(f"Using fdb5 shared library from {fdb5_path}")
print(f"Using metkit shared library from {metkit_path}")

marsrequest = (metkit_path.parents[1] / "share/metkit/odb/marsrequest.yaml").resolve()
languages = (metkit_path.parents[1] / "share/metkit/language.yaml").resolve() 

print(f"\nContents of {marsrequest}:\n", marsrequest.open().read())

fields = " ".join(yaml.safe_load(languages.open().read())["_field"].keys())
print(f"Fields defined in {languages}:\n {fields}")

Using fdb5 shared library from /Users/math/micromamba/envs/ionbeam/lib/libfdb5.dylib
Using metkit shared library from /Users/math/micromamba/envs/ionbeam/lib/libmetkit.dylib

Contents of /Users/math/micromamba/envs/ionbeam/share/metkit/odb/marsrequest.yaml:
 ---
PLATFORM: platform
OBSERVATION_VARIABLE: observation_variable
Fields defined in /Users/math/micromamba/envs/ionbeam/share/metkit/language.yaml:
 observation_variable platform class type stream expver dataset model repres obsgroup reportype levtype levelist param date hdate offsetdate fcmonth fcperiod time offsettime step anoffset reference number quantile domain frequency direction diagnostic iteration channel ident instrument method origin system


In [11]:
from pathlib import Path
from ionbeam.core.config_parser import parse_config
from ionbeam.core.bases import *
from IPython.display import display

config_file = Path("~/git/IonBeam-Deployment/config/ionbeam").expanduser()
config, actions = parse_config(config_file,
                    config_path = "./",
                    data_path = "../data/",
                    offline = True,
                    environment  = "local",
                    )



In [12]:
sources, stateless_actions, aggregators = [], [], []
for action in actions:
    if isinstance(action, Source):
        sources.append(action)
    elif isinstance(action, Aggregator):
        aggregators.append(action)
    else:
        stateless_actions.append(action)

print("Sources")
for i, a in enumerate(sources):
    display(a)

print("Aggregators")
for i, a in enumerate(aggregators):
    display(a)

print("Actions")
for i, a in enumerate(stateless_actions):
    display(a)

Sources


0,1
id,534dfc75-a728-4108-b51d-28a9918af68a
finish_after,
copy_metadata_to_columns,"[InputColumn(name='sensor.name', key='sensor_name', type=None, unit=None, discard=False, canonica..."
cache_version,1
use_cache,True
cache_directory,/Users/math/git/IonBeam-Deployment/data/inputs/smart_citizen_kit

name,value
source_action_id,534dfc75-a728-4108-b51d-28a9918af68a
state,raw
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='time', unit=None, desc='The time that the observation was made.'), Canon..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2023, 6, 10, 0, 0, tzinfo=datetime.timez..."


Aggregators
Actions


0,1
id,8357df04-021d-4a19-b1d6-5f43c12edac9
match,[Match(state = 'odc_encoded')]
FDB5_client_config,"{'engine': 'toc', 'spaces': [{'handler': 'Default', 'roots': [{'path': '/Users/math/git/IonBeam-D..."
debug,[]

name,value
source_action_id,8357df04-021d-4a19-b1d6-5f43c12edac9
state,written
mars_request,{}

name,value
canonical_variables,"[CanonicalVariable(name='time', unit=None, desc='The time that the observation was made.'), Canon..."
data_path,/Users/math/git/IonBeam-Deployment/data
metkit_language_template,/Users/math/git/IonBeam-Deployment/config/metkit/language.yaml.template
environment,local
fdb_schema_path,/Users/math/git/IonBeam-Deployment/config/fdb/server/custom_schema
secrets_file,/Users/math/git/IonBeam-Deployment/config/ionbeam/secrets.yaml
config_path,.
offline,True
overwrite,True
ingestion_time_constants,"IngestionTimeConstants(query_timespan=(datetime.datetime(2023, 6, 10, 0, 0, tzinfo=datetime.timez..."


In [None]:
online = True
if online:
    source = sources[0]
    source.globals.offline = !online
    source.use_cache = False
    incoming_message_stream = source.generate()
    message = next(incoming_message_stream)
    message
else:
    p = Path('/Users/math/git/IonBeam-Deployment/data/inputs/meteotracker/MeteoTracker_62ae154f1d8e11061d4474b2.csv')
    message = FileMessage(
        metadata=MetaData(state='raw',
            source='meteotracker',
            filepath= p,
            variables = list(pd.read_csv(p).columns),             
            ))
message

In [None]:
from ionbeam.core.bases import FinishMessage, FileMessage, MetaData
from ionbeam.aggregators import TimeAggregator

possible_actions = [a for a in actions if not isinstance(a, Source)]

message_history = []
while True:
    display(message)
    message_history.append(message)
    matching = [action for action in possible_actions if action.matches(message)]
    
    if not matching: 
        print("No more matches")
        break

    print("That matched with: ", [str(a) for a in matching])
    action = matching[0]

    # Special case for the TimeAggregator
    if action.__class__.__name__ == "TimeAggregator":
        list(action.process(message)) #need the list call here to pump the iterator to completion even if it doesn't return anything
        message = next(action.process(FinishMessage("We're done!")))
    else:
        message = next(action.process(message))
        

In [None]:
m = message_history[-1]
m.data

In [None]:

df = pd.DataFrame({"class" : ["rd", "rd"], "col1" : [1,2], "col2" : [1,2]})
pyodc.encode_odb(df, "test.odb")

with open("test.odb", "rb") as f:
    pyfdb.archive(f.read())