# Strax raw data check

In [1]:
import glob
from tqdm import tqdm
import os 
import os.path as osp

dirname = '/dali/lgrandi/xenon1t/strax_converted/raw'

import strax
import straxen
st = strax.Context(
    storage=strax.DataDirectory(dirname),
    **{**straxen.contexts.common_opts, 
       **dict(check_available=('raw_records',))})
st.register(straxen.plugins.pax_interface.RecordsFromPax)

straxen.plugins.pax_interface.RecordsFromPax

This will run for some time for a large directory over a network filesystem. It will show a progress bar, but no progress until is is done.

In [2]:
# Simple check
ok_runs = st.select_runs(available='raw_records')['name'].values.tolist()
len(ok_runs)

Checking data availability: 100%|██████████| 1/1 [00:10<00:00, 10.62s/it]


216

In [3]:
# Detailed check
rr_hash = st.key_for('0', 'raw_records').lineage_hash
all_dirs = set(glob.glob(dirname + '/*'))
all_dirs = set(d for d in all_dirs if 'meta' not in d)
checked_ok = set()

for q in tqdm(all_dirs):
    if not osp.isdir(q):
        continue
    try:
        _run_id, _data_type, _hash = st.storage[0]._parse_folder_name(q)
    except strax.storage.files.InvalidFolderNameFormat:
        print(f"{q}: Invalid or non-strax folder name")
        continue
     
    if _data_type != 'raw_records':
        print(f"{q}: Non-raw data type")
        continue

    if _hash != rr_hash:
        print(f"{q}: Out of date or invalid hash")
        continue
    
    try:
        md = st.get_metadata(_run_id, 'raw_records')
    except strax.DataCorrupted:
        print(f"{q}: Exception while loading metadata")
        continue

    if 'exception' in md:
        print(f"{q}: Metadata contains exception (crash during conversion)")
        continue
    if not 'writing_ended' in md:
        print(f"{q}: Metadata indicates conversion never finished.")
        continue
    
    if not 'chunks' in md:
        print(f"{q}: No chunks registered in metadata")
        continue
        
    if not 'lineage_hash' in md:
        print(f"{q}: Lineage hash missing from metadata")
        continue

    fn = osp.join(q, md['chunks'][0]['filename'])
    if not osp.exists(fn) or osp.getsize(fn) == 0:
        print(f"{q}: First chunk file is non-existent or empty")
        continue
        
    # If you want to be really really sure the data is ok, you can try to load the data
    # try:
    #     rr = st.get_array(_run_id, 'raw_records', seconds_range=(0, 0.1))
    # except Exception as e:
    #     print(f"{q}: Exception while actually loading data: {e}")
    #     continue
    
    checked_ok.add(q)
    
failing = all_dirs - checked_ok
print(f"Found {len(failing)} bad and {len(checked_ok)} good data folders out of {len(all_dirs)}")

100%|██████████| 217/217 [00:01<00:00, 118.57it/s]

Found 0 bad and 217 good data folders out of 217



