In [1]:
import numpy as np

from ch_util import andata
from chimedb import core, dataset as ds

Connect to the database and load a file (but only the dataset listing the dataset_ids contained in it)

In [2]:
core.connect()

In [3]:
fname = "/project/rpp-chime/chime/chime_online/20220901T034310Z_chimestack_corr/00000000_0000.h5"
ad = andata.CorrData.from_acq_h5(fname, datasets=("flags/dataset_id",))

Print out some of the dataset IDs in the file. As the first frequency is usually missing the dataset_ids associated with it are all the null state (32 zeros).

In [4]:
ad.flags["dataset_id"][:2, :5]

array([['00000000000000000000000000000000',
        '00000000000000000000000000000000',
        '00000000000000000000000000000000',
        '00000000000000000000000000000000',
        '00000000000000000000000000000000'],
       ['f938c2a20aa96de5bcc062ebaadf72fd',
        'f938c2a20aa96de5bcc062ebaadf72fd',
        'f938c2a20aa96de5bcc062ebaadf72fd',
        'f938c2a20aa96de5bcc062ebaadf72fd',
        'f938c2a20aa96de5bcc062ebaadf72fd']], dtype='<U33')

There's usually a lot of repeated states in a file, so you probably only want to bother processing the unique entries. Here we'll show how to do the queries directly, but the `chimedb.dataset.utils.state_id_of_type` routine is particularly useful for this type of analysis.

In [5]:
unique_dataset_ids = np.unique(ad.flags["dataset_id"][:])

We don't want to use the null dataset so we trim it off. As the output of `np.unique` is sorted we know that the null entry is the first. Also there's an issue with the string types numpy uses so we need to explicitly turn them into Python strings.

In [6]:
unique_dataset_ids = [str(u) for u in unique_dataset_ids[1:]]

The datasets form a chain listing all the transformations done to the data through the realtime system. Let's try and get the list of all transformations for one of the IDs. This is (going from the most recent, to the earliest transformation)...

In [7]:
dt = ds.Dataset.from_id(unique_dataset_ids[0])

while True:
    print(f"dataset_id={dt.id}; state_id={dt.state.id}; state_type={dt.state.type.name}")
    
    if dt.root:
        break
        
    dt = dt.base_dataset

dataset_id=05ed10f854fca62d40d6b5989c2dad7d; state_id=4dbfcd1acd2d3f7781f60144dd6b5667; state_type=eigenvalues
dataset_id=83b12f20b7e6b5c8fd4466f606de13b7; state_id=b4240cad7a2244de961a3ca3a848cf1a; state_type=stack
dataset_id=094f859a94ba19fc2034d7e07d5d8455; state_id=161c8277d3d08e97faeb0f41d2a974bb; state_type=gains
dataset_id=2245b69af56d0f35eb4fd46aa5dbcbc1; state_id=3d65f6cc04c941ad59beccbba7c5a49c; state_type=flags
dataset_id=b35fd161e72250e4801f3028deff5b54; state_id=178bd8e562838eb9b9b38549789fa42e; state_type=eigenvalues
dataset_id=2931ab110bfe32430a155c7837e72e0f; state_id=f57a844248d29dc0a1954e9b91e261c7; state_type=gating
dataset_id=a51129fd92c6d980d99754c77d430d5e; state_id=5e2a864b31621d630b313d6d2d299631; state_type=metadata
dataset_id=1396459024c927a5e4dfcbe13ae09d5f; state_id=4dbfcd1acd2d3f7781f60144dd6b5667; state_type=eigenvalues
dataset_id=592e01f52865a9a79fdb5481e631e88b; state_id=60db9f8d3e315e30b4893a1cf7ee3645; state_type=products
dataset_id=ed6878dee99abf744bc

Each of these state entries contains metadata describing exactly what the change of state was. Let's have a look at the changes to the eigenvalue state over time.

In [8]:
dt = ds.Dataset.from_id(unique_dataset_ids[0])

while True:
    
    if dt.state.type.name == "eigenvalues":
        print(f"dataset_id={dt.id}; state_data={dt.state.data}")
    
    if dt.root:
        break
        
    dt = dt.base_dataset

dataset_id=05ed10f854fca62d40d6b5989c2dad7d; state_data={'data': [], 'type': 'eigenvalues'}
dataset_id=b35fd161e72250e4801f3028deff5b54; state_data={'data': [0, 1, 2, 3], 'type': 'eigenvalues'}
dataset_id=1396459024c927a5e4dfcbe13ae09d5f; state_data={'data': [], 'type': 'eigenvalues'}


We can read this as saying that (from bottom to top) the system started with zero eigenvalues saved into the data, that at a later stage the first four eigenvalues were calculated and added into the data, and then at an even later stage, the eigenvalues were removed (empty list).

Let's try and find all the frequency maps used in this file. We do that by fetching the full dataset object for each ID and then looking for the most recent `"f_engine_frequency_map"` type in the chain of dataset IDs. We'll save a dictionary of the state IDs and the frequency map data. This is sufficient as state IDs have a one-to-one mapping to the state type, and associated data.

While we're at it, we'll also extract the F-engine config via the `config_pychfpga.fpga_master` type.

In [9]:
fmaps = {}
fconf = {}

for dsid in unique_dataset_ids:
    d = ds.Dataset.from_id(str(unique_dataset_ids[0]))
    
    ancestor = d.closest_ancestor_of_type("f_engine_frequency_map")
    state = ancestor.state
    fmaps[state.id] = state.data
    
    ancestor = d.closest_ancestor_of_type("config_pychfpga.fpga_master")
    state = ancestor.state
    fconf[state.id] = state.data

In [10]:
list(fmaps.keys()), list(fconf.keys()),

(['7214cc53dfdd6b74d46318ed85786bd9'], ['6fd4caef8bfef32e1a4d258f8ea3cb9a'])

Great, we found only one corresponding state. That is even though we found several different dataset_ids in the file they all share the same F-engine frequency map (as we would expect as the map isn't changed very often).

Let's look at some of the data, in this case the first 10 entries in the map

In [11]:
first_10_maps = {stream_id: freq_id_map for ii, (stream_id, freq_id_map) in enumerate(list(fmaps.values())[0]["fmap"].items()) if ii < 10}
first_10_maps

{'12288': [185, 265, 281, 289],
 '12289': [193, 305, 329, 761],
 '12290': [121, 129, 137, 169],
 '12291': [313, 321, 361, 417],
 '12292': [201, 337, 425, 489],
 '12293': [81, 393, 401, 409],
 '12294': [113, 145, 153, 177],
 '12295': [1, 369, 473, 481],
 '12304': [524, 620, 692, 732],
 '12305': [100, 540, 612, 924]}

Juan assures me that the string keys correspond to the stream IDs and that they can be decoded like this

In [12]:
# stream_id = shuffle*2**12 + crate*2**8 + slot*2**4 + link
def decode_stream_id(stream_id: str) -> tuple[int]:
    
    stream_id = int(stream_id)
    
    link = stream_id & 15
    slot = (stream_id >> 4) & 15
    crate = (stream_id >> 8) & 15
    shuffle = (stream_id >> 12) & 15
    
    return (shuffle, crate, slot, link)

In [13]:
for stream_id, freq_id_map in first_10_maps.items():
    print(decode_stream_id(stream_id), freq_id_map)

(3, 0, 0, 0) [185, 265, 281, 289]
(3, 0, 0, 1) [193, 305, 329, 761]
(3, 0, 0, 2) [121, 129, 137, 169]
(3, 0, 0, 3) [313, 321, 361, 417]
(3, 0, 0, 4) [201, 337, 425, 489]
(3, 0, 0, 5) [81, 393, 401, 409]
(3, 0, 0, 6) [113, 145, 153, 177]
(3, 0, 0, 7) [1, 369, 473, 481]
(3, 0, 1, 0) [524, 620, 692, 732]
(3, 0, 1, 1) [100, 540, 612, 924]


For interest here is the serialised F-engine config obtained via the dataset states

In [14]:
list(fconf.values())[0]

{'input_reorder': [[128, 0, 'FCC000812'],
  [129, 1, 'FCC000813'],
  [130, 2, 'FCC000814'],
  [131, 3, 'FCC000815'],
  [132, 4, 'FCC000808'],
  [133, 5, 'FCC000809'],
  [134, 6, 'FCC000810'],
  [135, 7, 'FCC000811'],
  [136, 8, 'FCC000804'],
  [137, 9, 'FCC000805'],
  [138, 10, 'FCC000806'],
  [139, 11, 'FCC000807'],
  [140, 12, 'FCC000800'],
  [141, 13, 'FCC000801'],
  [142, 14, 'FCC000802'],
  [143, 15, 'FCC000803'],
  [160, 16, 'FCC001012'],
  [161, 17, 'FCC001013'],
  [162, 18, 'FCC001014'],
  [163, 19, 'FCC001015'],
  [164, 20, 'FCC001008'],
  [165, 21, 'FCC001009'],
  [166, 22, 'FCC001010'],
  [167, 23, 'FCC001011'],
  [168, 24, 'FCC001004'],
  [169, 25, 'FCC001005'],
  [170, 26, 'FCC001006'],
  [171, 27, 'FCC001007'],
  [172, 28, 'FCC001000'],
  [173, 29, 'FCC001001'],
  [174, 30, 'FCC001002'],
  [175, 31, 'FCC001003'],
  [192, 32, 'FCC001212'],
  [193, 33, 'FCC001213'],
  [194, 34, 'FCC001214'],
  [195, 35, 'FCC001215'],
  [196, 36, 'FCC001208'],
  [197, 37, 'FCC001209'],
  [19