In [27]:
from construct import *

format_word = Struct(
    "daq_major" / Byte,
    "daq_minor" / Byte,
    "data_format_major" / Byte,
    "data_format_minor" / Byte
)

two_word_file_header = Struct(
    "endian_indicator" / Int32ul,
    "data_format" / format_word
)

detector_config_header = Struct(
    "header_number" / Int32ul,
    "config_record_len" / Int32ul,
    "repeat_value" / Computed(
        lambda this: (this.config_record_len // 72) + (this.config_record_len // 144)
    )
)

charge_config_header = Struct(
    "charge_config_len" / Int32ul,
    "detector_code" / Int32sl,
    "tower_number" / Int32sl,
    "channel_post_amp" / Int32sl,
    "channel_bias" / Int32sl,
    "rtf_offset" / Int32sl,
    "delta_t" / Int32sl,
    "trigger_time" / Int32sl,
    "trace_len" / Int32sl
)

phonon_config_header = Struct(
    "phonon_config_len" / Int32ul,
    "detector_code" / Int32sl,
    "tower_number" / Int32sl,
    "post_amp_gain" / Int32sl,
    "qet_bias" / Int32sl,
    "squid_bias" / Int32sl,
    "squid_lockpoint" / Int32sl,
    "rtf_offset" / Int32sl,
    "variable_gain" / Int32sl,
    "delta_t" / Int32sl,
    "trigger_time" / Int32sl,
    "trace_len" / Int32sl
)

header_list = Struct(
    "header_number" / Int32ul,
    "charge_config" / If(
        lambda this: this.header_number == 0x10002,
        charge_config_header
    ),
    "phonon_config" / If(
        lambda this: this.header_number == 0x10001,
        phonon_config_header
    )
)

event_header = Struct(
    "event_header_word" / Int32ul,
    "event_size" / Int32ul,
    "event_identifier" / Computed(
        lambda this: (this.event_header_word >> 16) & 0xFFFF
    ),
    "event_class" / Computed(
        lambda this: (this.event_header_word >> 8) & 0xF
    ),
    "event_category" / Computed(
        lambda this: (this.event_header_word >> 12) & 0xF
    ),
    "event_type" / Computed(
        lambda this: (this.event_header_word & 0xFF)
    )
)

administrative_record = Struct(
    "admin_header" / Int32ul,
    "admin_len" / Int32ul,
    "series_number_1" / Int32ul,
    "series_number_2" / Int32ul,
    "event_number_in_series" / Int32ul,
    "seconds_from_epoch" / Int32ul,
    "time_from_last_event" / Int32ul,
    "live_time_from_last_event" / Int32ul
)

trace_record = Struct(
    "trace_header" / Int32ul,
    "trace_len" / Int32ul,
    "trace_bookkeeping_header" / Int32ul,
    "bookkeeping_len" / Int32ul,
    "digitizer_base_address" / Int32ul,
    "digitizer_channel" / Int32ul,
    "detector_code" / Int32ul,
    "timebase_header" / Int32ul,
    "timebase_len" / Int32ul,
    "t0_in_ns" / Int32ul,
    "delta_t_ns" / Int32ul,
    "num_of_points" / Int32ul,
    "second_trace_header" / Int32ul,
    "num_samples" / Int32ul
)

data_sample = Struct(
    "data_selection" / Int32ul,
    "sample_a" / Computed(
        lambda this: (this.data_selection >> 16) & 0xFFFF
    ),
    "sample_b" / Computed(
        lambda this: (this.data_selection & 0xFFFF)
    )
)


trace_data = Struct(
    "trace_rcrds" / trace_record,
    "sample_data" / Array(
        this.trace_rcrds.num_samples // 2,
        data_sample
    )
)

soudan_history_buffer = Struct(
    "history_buffer_header" / Int32ul,
    "history_buffer_len" / Int32ul,
    "num_time_nvt" / Int32ul,
    "time_nvt" / Array(
        this.num_time_nvt,
        Int32ul
    ),
    "num_veto_mask_words" / Int32ul,
    "time_n_minus_veto_mask" / Array(
        this.num_time_nvt * this.num_veto_mask_words,
        Int32ul
    ),
    "num_trigger_times" / Int32ul,
    "trigger_times" / Array(
        this.num_trigger_times,
        Int32ul
    ),
    "num_trigger_mask_words" / Int32ul,
    "trig_times_minus_trig_mask" / Array(
        this.num_trigger_times * this.num_trigger_mask_words,
        Int32ul
    )
)

trigger_record = Struct(
    "trigger_header" / Int32ul,
    "trigger_len" / Int32ul,
    "trigger_time" / Int32ul,
    "individual_trigger_masks" / Array(
        6,
        Int32ul
    )
)

tlb_trigger_mask_record = Struct(
    "tlb_mask_header" / Int32ul,
    "tlb_len" / Int32ul,
    "tower_mask" / Array(
        6,
        Int32ul
    )
)

gps_data = Struct(
    "tlb_mask_header" / Int32ul,
    "length" / Int32ul,
    "gps_year_day" / If(
        this.length > 0,
        Int32ul
    ),
    "gps_status_hour_minute_second" / If(
        this.length > 0,
        Int32ul
    ),
    "gps_microsecs_from_gps_second" / If(
        this.length > 0,
        Int32ul
    )
)

detector_trigger_threshold_data = Struct(
    "threshold_header" / Int32ul,
    "len_to_next_header" / Int32ul,
    "minimum_voltage_level" / Int32ul,
    "maximum_voltage_level" / Int32ul,
    "dynamic_range" / Int32ul,
    "tower_number" / Int32ul,
    "detector_codes" / Array(
        6,
        Int32ul
    ),
    "operations_codes" / Array(
        9,
        Int32ul
    ),
    "adc_values" / Array(
        54,
        Int32ul
    )
)

detector_trigger_rates = Struct(
    "detector_trigger_header" / Int32ul,
    "len_to_next_header" / Int32ul,
    "clocking_interval" / Int32ul,
    "tower_number" / Int32ul,
    "detector_codes" / Array(
        6,
        Int32ul
    ),
    "j_codes" / Array(
        5,
        Int32ul
    ),
    "counter_values" / Array(
        30,
        Int32ul
    )
)

veto_trigger_rates = Struct(
    "veto_trigger_header" / Int32ul,
    "len_to_next_header" / Int32ul,
    "clocking_interval" / Int32ul,
    "num_entries" / Int32ul,
    "detector_code" / Array(
        this.num_entries,
        Int32ul
    ),
    "counter_value_det_code" / Array(
        this.num_entries,
        Int32ul
    )
)

# Trying to generalize the logical records type
logical_records_unused = Struct(
    "event_hdr" / event_header,
    "next_section" / Array(
        6,
        Struct(
            "next_header" / Peek(Int32ul),
            "section" / Switch(
                this.next_header,
                {
                    0x00000002: administrative_record,
                    0x00000011: trace_data,
                    0x00000021: soudan_history_buffer,
                    0x00000060: gps_data,
                    0x00000080: trigger_record,
                    0x00000081: tlb_trigger_mask_record,
                    0x00000022: detector_trigger_rates,
                    0x00000031: veto_trigger_rates
                }
            )
        )
    )
)

logical_records = Struct(
    "event_hdr" / event_header,
    "admin_rcrd" / administrative_record,
    "trigger_rcrd" / trigger_record,
    "tlb_trig_mask_rcrd" / tlb_trigger_mask_record,
    "gps_data" / gps_data,
    "trace_data" / Array(
        this._root.detector_hdr.repeat_value,
        trace_data
    ),
    "soudan_buffer" / soudan_history_buffer#,
    #"detector_threshold_data" / detector_trigger_threshold_data,
    #"detector_trig_rates" / detector_trigger_rates,
    #"veto_trig_rates" / veto_trigger_rates
)

In [28]:
soudan = Struct(
    "file_hdr" / two_word_file_header,
    "detector_hdr" / detector_config_header,
    "hdrs" / Array(
        this._root.detector_hdr.repeat_value,
        header_list
    ),
    "logical_rcrds" / GreedyRange(logical_records)
)

test = Struct(
    "file_hdr" / two_word_file_header,
    "detector_hdr" / detector_config_header,
    "hdrs" / Array(
        this._root.detector_hdr.repeat_value,
        header_list
    ),
    "logical_rcrds" / Array(
        2,
        logical_records
    )
)

In [36]:
import h5py


def parse_file(input_path, output_path):
    with open(input_path, 'rb') as f:
        raw_data = f.read()
        parsed_data = soudan.parse(raw_data)

    with h5py.File(output_path, 'w') as f:

        file_hdr_grp = f.create_group('file_hdr')
        file_hdr_grp.create_dataset('daq_major', data=parsed_data.file_hdr.data_format.daq_major)
        file_hdr_grp.create_dataset('daq_minor', data=parsed_data.file_hdr.data_format.daq_minor)
        file_hdr_grp.create_dataset('data_format_major', data=parsed_data.file_hdr.data_format.data_format_major)
        file_hdr_grp.create_dataset('data_format_minor', data=parsed_data.file_hdr.data_format.data_format_minor)

        detector_hdr_grp = f.create_group('detector_hdr')
        detector_hdr_grp.create_dataset('header_number', data=parsed_data.detector_hdr.header_number)
        detector_hdr_grp.create_dataset('config_record_len', data=parsed_data.detector_hdr.config_record_len)
        detector_hdr_grp.create_dataset('repeat_value', data=parsed_data.detector_hdr.repeat_value)

        hdrs_grp = f.create_group('hdrs')
        for i, hdr in enumerate(parsed_data.hdrs):
            header_grp = hdrs_grp.create_group(f'header_{i}')
            header_grp.create_dataset('header_number', data=hdr.header_number)
            
            if hdr.header_number == 0x10002:
                charge_grp = header_grp.create_group(f'charge_config_{i}')
                charge_grp.create_dataset('charge_config_len', data=hdr.charge_config.charge_config_len)
                charge_grp.create_dataset('detector_code', data=hdr.charge_config.detector_code)
                charge_grp.create_dataset('tower_number', data=hdr.charge_config.tower_number)
                charge_grp.create_dataset('channel_post_amp', data=hdr.charge_config.channel_post_amp)
                charge_grp.create_dataset('rtf_offset', data=hdr.charge_config.rtf_offset)
                charge_grp.create_dataset('delta_t', data=hdr.charge_config.delta_t)
                charge_grp.create_dataset('trigger_time', data=hdr.charge_config.trigger_time)
                charge_grp.create_dataset('trace_len', data=hdr.charge_config.trace_len)
                
            if hdr.header_number == 0x10001:
                phonon_grp = header_grp.create_group(f'phonon_config_{i}')
                phonon_grp.create_dataset('phonon_config_len', data=hdr.phonon_config.phonon_config_len)
                phonon_grp.create_dataset('detector_code', data=hdr.phonon_config.detector_code)
                phonon_grp.create_dataset('tower_number', data=hdr.phonon_config.tower_number)
                phonon_grp.create_dataset('post_amp_gain', data=hdr.phonon_config.post_amp_gain)
                phonon_grp.create_dataset('qet_bias', data=hdr.phonon_config.qet_bias)
                phonon_grp.create_dataset('squid_bias', data=hdr.phonon_config.squid_bias)
                phonon_grp.create_dataset('squid_lockpoint', data=hdr.phonon_config.squid_lockpoint)
                phonon_grp.create_dataset('rtf_offset', data=hdr.phonon_config.rtf_offset)
                phonon_grp.create_dataset('variable_gain', data=hdr.phonon_config.variable_gain)
                phonon_grp.create_dataset('delta_t', data=hdr.phonon_config.delta_t)
                phonon_grp.create_dataset('trigger_time', data=hdr.phonon_config.trigger_time)
                phonon_grp.create_dataset('trace_len', data=hdr.phonon_config.trace_len)
        
        logical_rcrd_array = f.create_group('logical_rcrds')
        pulse_array = logical_rcrd_array.create_group('pulse_data')
        event_hdr_array = logical_rcrd_array.create_group('event_header_list')
        admin_rcrd_array = logical_rcrd_array.create_group('admin_rcrd_list')
        trigger_rcrds_array = logical_rcrd_array.create_group('trigger_rcrd_list')
        tlb_mask_rcrds_array = logical_rcrd_array.create_group('tlb_trig_mask_rcrd_list')
        gps_data_array = logical_rcrd_array.create_group('gps_data_list')
        trace_rcrd_array = logical_rcrd_array.create_group('trace_rcrd_list')
        soudan_buffer_array = logical_rcrd_array.create_group('soudan_buffer_list')
        for i, logical_record in enumerate(parsed_data.logical_rcrds):
            # Create a group for each logical record event header under pulse_grp
            event_hdr_grp = event_hdr_array.create_group(f'event_hdr_{i}')
            event_hdr_grp.create_dataset('event_header_word', data=logical_record.event_hdr.event_header_word)
            event_hdr_grp.create_dataset('event_size', data=logical_record.event_hdr.event_size)
            event_hdr_grp.create_dataset('event_identifier', data=logical_record.event_hdr.event_identifier)
            event_hdr_grp.create_dataset('event_class', data=logical_record.event_hdr.event_class)
            event_hdr_grp.create_dataset('event_category', data=logical_record.event_hdr.event_category)
            event_hdr_grp.create_dataset('event_type', data=logical_record.event_hdr.event_type)

            admin_rcrd_grp = admin_rcrd_array.create_group(f'admin_rcrd_{i}')
            admin_rcrd_grp.create_dataset('admin_header', data=logical_record.admin_rcrd.admin_header)
            admin_rcrd_grp.create_dataset('admin_len', data=logical_record.admin_rcrd.admin_len)
            admin_rcrd_grp.create_dataset('series_number_1', data=logical_record.admin_rcrd.series_number_1)
            admin_rcrd_grp.create_dataset('series_number_2', data=logical_record.admin_rcrd.series_number_2)
            admin_rcrd_grp.create_dataset('event_number_in_series', data=logical_record.admin_rcrd.event_number_in_series)
            admin_rcrd_grp.create_dataset('seconds_from_epoch', data=logical_record.admin_rcrd.seconds_from_epoch)
            admin_rcrd_grp.create_dataset('time_from_last_event', data=logical_record.admin_rcrd.time_from_last_event)
            admin_rcrd_grp.create_dataset('live_time_from_last_event', data=logical_record.admin_rcrd.live_time_from_last_event)

            trigger_rcrd_grp = trigger_rcrds_array.create_group(f'trigger_rcrd_{i}')
            trigger_rcrd_grp.create_dataset('trigger_header', data=logical_record.trigger_rcrd.trigger_header)
            trigger_rcrd_grp.create_dataset('trigger_len', data=logical_record.trigger_rcrd.trigger_len)
            trigger_rcrd_grp.create_dataset('trigger_time', data=logical_record.trigger_rcrd.trigger_time)
            trigger_rcrd_grp.create_dataset('individual_trigger_masks', data=logical_record.trigger_rcrd.individual_trigger_masks)

            tlb_mask_grp = tlb_mask_rcrds_array.create_group(f'tlb_trig_mask_rcrd_{i}')
            tlb_mask_grp.create_dataset('tlb_mask_header', data=logical_record.tlb_trig_mask_rcrd.tlb_mask_header)
            tlb_mask_grp.create_dataset('tlb_len', data=logical_record.tlb_trig_mask_rcrd.tlb_len)
            tlb_mask_grp.create_dataset('tower_mask', data=logical_record.tlb_trig_mask_rcrd.tower_mask)

            gps_data_grp = gps_data_array.create_group(f'gps_data_{i}')
            gps_data_grp.create_dataset('tlb_mask_header', data=logical_record.gps_data.tlb_mask_header)
            gps_data_grp.create_dataset('length', data=logical_record.gps_data.length)
            # The following are None if length = 0
            # Hdf5 doesn't like that
            if logical_record.gps_data.length != 0:
                gps_data_grp.create_dataset('gps_year_day', data=logical_record.gps_data.gps_year_day)
                gps_data_grp.create_dataset('gps_status_hour_minute_second', data=logical_record.gps_data.gps_status_hour_minute_second)
                gps_data_grp.create_dataset('gps_microsecs_from_gps_second', data=logical_record.gps_data.gps_microsecs_from_gps_second)

            


            soudan_buffer_grp = soudan_buffer_array.create_group(f'soudan_buffer_{i}')

            # Initialize an empty array to fill with trace data
            trace_array = []
            for trace_record in logical_record.trace_data:
                trace = []
                for data in trace_record.sample_data:
                    trace.append(data.sample_a)
                    trace.append(data.sample_b)
            trace_array.append(trace)
        pulse_array.create_dataset('traces', data = trace_array)
        #pulse_grp.create_dataset("sample_a", data = parsed_data.logical_rcrds[0].trace_rcrds[0].sample_data[0].sample_a)
        #pulse_grp.create_dataset("sample_b", data = parsed_data.logical_rcrds[0].trace_rcrds[0].sample_data[0].sample_b)



input_path  = "../01120210_0727_F0114"
#output_path = "../parsedh5.hdf5"
output_path = "../large_parsedh5.hdf5"
# For final files, save to novateur network:
#output_path = "/data3/afisher/soudan_output/parsed.txt"

parse_file(input_path, output_path)


In [None]:
trace_rcrd_grp.create_dataset('trace_len', data=logical_record.trace_rcrds.trace_len)
            trace_rcrd_grp.create_dataset('trace_bookkeeping_header', data=logical_record.trace_rcrds.trace_bookkeeping_header)
            trace_rcrd_grp.create_dataset('bookkeeping_len', data=logical_record.trace_rcrds.bookkeeping_len)
            trace_rcrd_grp.create_dataset('digitizer_base_address', data=logical_record.trace_rcrds.digitizer_base_address)
            trace_rcrd_grp.create_dataset('digitizer_channel', data=logical_record.trace_rcrds.digitizer_channel)
            trace_rcrd_grp.create_dataset('detector_code', data=logical_record.trace_rcrds.detector_code)
            trace_rcrd_grp.create_dataset('timebase_header', data=logical_record.trace_rcrds.timebase_header)
            trace_rcrd_grp.create_dataset('timebase_len', data=logical_record.trace_rcrds.timebase_len)
            trace_rcrd_grp.create_dataset('t0_in_ns', data=logical_record.trace_rcrds.t0_in_ns)
            trace_rcrd_grp.create_dataset('delta_t_ns', data=logical_record.trace_rcrds.delta_t_ns)
            trace_rcrd_grp.create_dataset('num_of_points', data=logical_record.trace_rcrds.num_of_points)
            trace_rcrd_grp.create_dataset('second_trace_header', data=logical_record.trace_rcrds.second_trace_header)
            trace_rcrd_grp.create_dataset('num_samples', data=logical_record.trace_rcrds.num_samples)