# Testing Parsing XML Files With BeautifulSoup, Specifically acq.macaml


## Setting up the Environment

In [8]:
from pathlib import Path

from bs4 import BeautifulSoup as bs

In [9]:
p = Path('/Users/jonathan/0_jono_data/2023-03-01_15-22-02_ACETONE_VOID-VOL-MEASUREMENT.sequence/ACETONE0002.D/acq.macaml')

In [10]:
def read_single_signal_xml(path):
    if path.is_file():
        with path.open() as f:

            file = f.read()

            soup = bs(file, 'xml')

            single_signal_dict = {}


        for section in soup.Content.Section:
            if "Signal" in section.text:
                for row in section.Table.find_all('Row'):
                    for parameter in row.find_all('Parameter'):
                        if "Signals_Signal_ID" in parameter.find('ID'):

                            signal_ID = parameter.Value.text

                        if "Signals_Signal_Wavelength" in parameter.find('ID'):
                            signal_wavelength = f"{parameter.Value.text} {parameter.Unit.text}"
                        
                        if "Signals_Signal_Bandwidth" in parameter.find("ID"):
                            signal_bandwidth = f"{parameter.Value.text} {parameter.Unit.text}"

                    single_signal_dict[signal_ID] = {"wavelength" : signal_wavelength,
                         "bandwidth" : signal_bandwidth}

        # Each row has multiple parameters, each which has its own Name, ID, Unit, and Value.
        # 
        # The first parameter asks whether to 'use' the signal, I guess for the trace?
        # 
        # The second paramter contains the designation of the signal from A to H (?) as Value = 'Signal X' where X is the letter, 
        # the third parameter contains the wavelength of the signal, where Unit is 'nm' and 'Value' is the Scalar value of the unit.
        #
        # The fourth parameter contains the signal bandwidth, in the same form as the third parameter.
        # 
        # The fifth parameter covers the use of a reference signal, containing a boolean "Yes" or "No".
    
    else:
        print("bad input")
    return single_signal_dict

In [11]:
for file in Path("/Users/jonathan/0_jono_data").rglob("*.D/*acq.macaml"):
    print(file.parent.name)
    print(read_single_signal_xml(file))

2023-02-15_COFFEE_COLUMN_CHECK.D
{'Signal A': {'wavelength': '240.0 nm', 'bandwidth': '4.0 nm'}, 'Signal B': {'wavelength': '244.0 nm', 'bandwidth': '4.0 nm'}, 'Signal C': {'wavelength': '248.0 nm', 'bandwidth': '4.0 nm'}, 'Signal D': {'wavelength': '252.0 nm', 'bandwidth': '4.0 nm'}, 'Signal E': {'wavelength': '256.0 nm', 'bandwidth': '4.0 nm'}, 'Signal F': {'wavelength': '260.0 nm', 'bandwidth': '4.0 nm'}, 'Signal G': {'wavelength': '260.0 nm', 'bandwidth': '4.0 nm'}, 'Signal H': {'wavelength': '260.0 nm', 'bandwidth': '4.0 nm'}}
2023-02-09_14-59-17_NC1.D
{'Signal A': {'wavelength': '240.0 nm', 'bandwidth': '4.0 nm'}, 'Signal B': {'wavelength': '244.0 nm', 'bandwidth': '4.0 nm'}, 'Signal C': {'wavelength': '248.0 nm', 'bandwidth': '4.0 nm'}, 'Signal D': {'wavelength': '252.0 nm', 'bandwidth': '4.0 nm'}, 'Signal E': {'wavelength': '256.0 nm', 'bandwidth': '4.0 nm'}, 'Signal F': {'wavelength': '260.0 nm', 'bandwidth': '4.0 nm'}, 'Signal G': {'wavelength': '260.0 nm', 'bandwidth': '4.0 