In [1]:
import streamlit as st
import polars as pl
import h5py
import numpy as np
from pathlib import Path
from abc import ABC, abstractmethod
import pandas as pd
from st_aggrid import AgGrid, GridOptionsBuilder
from typing import Any




In [None]:
class NexusDataProcessor:
    def __init__(self):
        self.data = None

    def extract_data(self, h5_obj: h5py.Group, path: str = "/", data_dict: dict | None = None) -> dict:
        if data_dict is None:
            data_dict = {}
        
        for key in h5_obj.keys():
            full_path = f"{path}{key}"
            item = h5_obj[key]
            
            if isinstance(item, h5py.Group):
                self.extract_data(item, full_path + "/", data_dict)
            elif isinstance(item, h5py.Dataset):
                try:
                    data = item[()]
                    if isinstance(data, np.ndarray):
                        data_dict[full_path] = data.tolist()
                    elif isinstance(data, (bytes, bytearray)):
                        data_dict[full_path] = data.decode("utf-8", errors="ignore")  # Avoid decoding errors
                    else:
                        data_dict[full_path] = data
                except Exception as e:
                    data_dict[full_path] = f"Error: {e}"
        
        return data_dict

    def find_nxentry(self, h5_obj, path="/"):
        """Recursively find the NXentry group dynamically."""
        for key in h5_obj.keys():
            full_path = f"{path}{key}"
            item = h5_obj[key]
            
            if isinstance(item, h5py.Group):
                if item.attrs.get("NX_class") in [b"NXentry", "NXentry"]:
                    print(f"Found NXentry: {full_path}")
                    return item, full_path
                # Recursively search in sub-groups
                result = self.find_nxentry(item, full_path + "/")
                if result[0]:
                    return result
        
        return None, None

    def process_single_file(self, file_path: Path) -> dict:
        with h5py.File(file_path, "r") as f:
            f.flush()  # Ensure metadata is up-to-date
            nxentry_group, nxentry_path = self.find_nxentry(f)
            if not nxentry_group:
                raise ValueError("No NXentry found in file. Ensure the file is correctly structured.")
            
            data_dict = self.extract_data(nxentry_group, nxentry_path + "/")
            data_dict["filename"] = file_path.name
            return data_dict

    def process_multiple_files(self, file_paths: list) -> pl.DataFrame:
        all_data = [self.process_single_file(fp) for fp in file_paths]
        return pl.DataFrame(all_data)

    def extract_time_series(self, file_path: Path, df: pl.DataFrame) -> pl.DataFrame:
        with h5py.File(file_path, "r") as f:
            nxentry_group, _ = self.find_nxentry(f)
            if not nxentry_group:
                raise ValueError("No NXentry found in file.")
            
            time_data = {}
            if "start_time" in nxentry_group:
                time_data["start_time"] = nxentry_group["start_time"][()]
            if "end_time" in nxentry_group:
                time_data["end_time"] = nxentry_group["end_time"][()]
            if "data" in nxentry_group and "epoch" in nxentry_group["data"]:
                time_data["epoch"] = nxentry_group["data"]["epoch"][()]

        if time_data:
            start_time = df["start_time"].to_list()[0]
            end_time = df["end_time"].to_list()[0]
            epoch = df["epoch"].to_list()[0]
            
            if start_time is not None and epoch is not None:
                time_series_calc = [start_time + t for t in epoch]
                assert time_series_calc[-1] == end_time, "Calculated time series does not match end_time!"
                df = df.with_columns(pl.Series("time_series_calc", time_series_calc))
        
        return df


    
def list_groups(h5_obj, path="/"):
    """Recursively list all groups and their attributes in the file."""
    for key in h5_obj.keys():
        full_path = f"{path}{key}"
        item = h5_obj[key]

        if isinstance(item, h5py.Group):
            print(f"Group: {full_path}, Attributes: {dict(item.attrs)}")
            list_groups(item, full_path + "/")  # Recurse into sub-groups

# Define file path
file_path = Path("/Users/lotzegud/P08/fio_nxs_and_cmd_tool/nai_250mm_02348.nxs")

with h5py.File(file_path, "r") as f:
    list_groups(f)
    f.flush()


#''' 

# Initialize processor
processor = NexusDataProcessor()

# Process a single file
data_dict = processor.process_single_file(file_path)

# Print extracted data keys
print("\nExtracted Data Keys:", list(data_dict.keys()))

# If needed, convert data_dict to a DataFrame
df = pl.DataFrame([data_dict])
print(df.shape)
print(df.schema)
display(df)
#'''






Group: /nexus_logs, Attributes: {}
Group: /nexus_logs/configuration, Attributes: {}
Group: /scan, Attributes: {'NX_class': 'NXentry', 'default': 'data'}
Group: /scan/apd, Attributes: {'NX_class': 'NXmonitor'}
Group: /scan/apd2, Attributes: {'NX_class': 'NXmonitor'}
Group: /scan/bpm1, Attributes: {'NX_class': 'NXmonitor'}
Group: /scan/bpm1/attenuator, Attributes: {'NX_class': 'NXcollection'}
Group: /scan/bpm2, Attributes: {'NX_class': 'NXmonitor'}
Group: /scan/bpm2/attenuator, Attributes: {'NX_class': 'NXcollection'}
Group: /scan/bpm3, Attributes: {'NX_class': 'NXmonitor'}
Group: /scan/bpm3/attenuator, Attributes: {'NX_class': 'NXcollection'}
Group: /scan/bpm4, Attributes: {'NX_class': 'NXmonitor'}
Group: /scan/bpm4/attenuator, Attributes: {'NX_class': 'NXcollection'}
Group: /scan/data, Attributes: {'NX_class': 'NXdata', 'axes': array(['exp_dmy01', '.'], dtype=object), 'signal': 'amptek_spectrum'}
Group: /scan/instrument, Attributes: {'NX_class': 'NXinstrument'}
Group: /scan/instrument/

/scan/apd/data,/scan/apd/mode,/scan/apd2/data,/scan/apd2/mode,/scan/bpm1/attenuator/foilpos,/scan/bpm1/attenuator/type,/scan/bpm1/depends_on,/scan/bpm1/mode,/scan/bpm2/attenuator/foilpos,/scan/bpm2/attenuator/type,/scan/bpm2/depends_on,/scan/bpm2/mode,/scan/bpm3/attenuator/foilpos,/scan/bpm3/attenuator/type,/scan/bpm3/depends_on,/scan/bpm3/mode,/scan/bpm4/attenuator/foilpos,/scan/bpm4/attenuator/type,/scan/bpm4/depends_on,/scan/bpm4/mode,/scan/data/alpha_pos,/scan/data/amptek_roi1,/scan/data/amptek_roi2,/scan/data/amptek_roi3,/scan/data/amptek_roi4,/scan/data/amptek_spectrum,/scan/data/apd,/scan/data/apd2,/scan/data/beta_pos,/scan/data/data,/scan/data/epoch,/scan/data/exp_dmy01,/scan/data/exp_t01,/scan/data/ion1,/scan/data/ion2,/scan/data/ion_bl,/scan/data/lom_foil,…,/scan/instrument/table_a/depends_on,/scan/instrument/table_a/transformation/h1,/scan/instrument/table_a/transformation/h2,/scan/instrument/table_a/transformation/vc,/scan/instrument/table_a/transformation/vl,/scan/instrument/table_a/transformation/vr,/scan/instrument/table_b/depends_on,/scan/instrument/table_b/transformation/h1,/scan/instrument/table_b/transformation/h2,/scan/instrument/table_b/transformation/trans,/scan/instrument/table_b/transformation/vc,/scan/instrument/table_b/transformation/vl,/scan/instrument/table_b/transformation/vr,/scan/ion1/data,/scan/ion1/mode,/scan/ion2/data,/scan/ion2/mode,/scan/ion_bl/data,/scan/ion_bl/mode,/scan/lom_foil_vfc/data,/scan/lom_foil_vfc/mode,/scan/program_name,/scan/sample/chemical_formula,/scan/sample/depends_on,/scan/sample/lisa_sample_stage/goniy,/scan/sample/lisa_sample_stage/salign,/scan/sample/lisa_sample_stage/sphi,/scan/sample/lisa_sample_stage/sth,/scan/sample/lisa_sample_stage/sx,/scan/sample/lisa_sample_stage/sy,/scan/sample/lisa_sample_stage/sz,/scan/sample/name,/scan/sample/transformations/goni_l,/scan/sample/transformations/goni_u,/scan/start_time,/scan/title,filename
list[f64],str,list[f64],str,list[f64],str,str,str,list[f64],str,str,str,list[f64],str,str,str,list[f64],str,str,str,list[f64],list[f64],list[f64],list[f64],list[f64],list[list[f64]],list[f64],list[f64],list[f64],list[list[f64]],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],…,str,list[f64],list[f64],list[f64],list[f64],list[f64],str,list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],str,list[f64],str,list[f64],str,list[f64],str,str,str,str,list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],str,list[f64],list[f64],str,str,str
"[173383.0, 173175.0, … 174674.0]","""monitor""","[173383.0, 173175.0, … 174674.0]","""monitor""",[22.0],"""Ni""","""transformations/x""","""monitor""",[44.0],"""Ti""","""transformations/x""","""monitor""",[40.0],"""Ti""","""transformations/x""","""monitor""",[43.0],"""Ti""","""transformations/x""","""monitor""","[0.060444, 0.060444, … 0.060445]","[1335.0, 1363.0, … 1745.0]","[111.0, 126.0, … 133.0]","[264.0, 232.0, … 248.0]","[41.0, 26.0, … 32.0]","[[0.0, 0.0, … 0.0], [0.0, 0.0, … 0.0], … [0.0, 0.0, … 1.0]]","[173383.0, 173175.0, … 174674.0]","[173383.0, 173175.0, … 174674.0]","[-0.000018, -0.000034, … -0.000034]","[[0.0, 0.0, … 0.0], [0.0, 0.0, … 0.0], … [0.0, 0.0, … 1.0]]","[1.7294e9, 1.7294e9, … 1.7294e9]","[0.0, 1.0, … 100.0]","[5.0, 5.0, … 5.0]","[92461.5, 91677.0, … 92752.0]","[41476.5, 41244.0, … 41754.0]","[210803.5, 210554.5, … 211811.5]","[0.894625, 0.892984, … 0.902573]",…,"""transformations/vc""",[-13.11],[-11.925],[-0.38],[-5.09],[-4.1428],"""transformations/vc""",[210.0],[-17.622674],[395.0],[8.634],[8.57],[103.0],"[92461.5, 91677.0, … 92752.0]","""monitor""","[41476.5, 41244.0, … 41754.0]","""monitor""","[210803.5, 210554.5, … 211811.5]","""monitor""","[449929.5, 448825.5, … 451224.0]","""monitor""","""NexDaTaS""","""n/a""","""transformations/goni_u""",[0.0],[-0.10148],[7.699988],"[0.031641, 0.031641, … 0.031641]",[5.0],[0.0],[4.24115],"""nacl""",[-0.08175],[0.025],"""2024-10-20T11:35:04.819847+020…","""PumpProbe""","""nai_250mm_02348.nxs"""
