In [14]:
import base64
import xml.etree.ElementTree as ET
import pandas as pd
import os

In [9]:
def set_bit(v, index, x):
    """
        Set the index:th bit of v to 1 if x is truthy,
        else to 0, and return the new value.
    """
    mask = 1 << index   # Compute mask, an integer with just bit 'index' set.
    v &= ~mask          # Clear the bit indicated by the mask (if x is False)
    if x:
        v |= mask         # If x was True, set the bit indicated by the mask.
    return v 

In [10]:
def decode_xml_file(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()

    data = []

    for cpc in root.findall('cpc'):
        
        seq = cpc.attrib.get('seq', None)
        datetime = cpc.attrib.get('datetime', None)
        tzoffset = cpc.attrib.get('tzoffset', None)
        tz = cpc.attrib.get('tz', None)

        for device in cpc.findall('.//device'):
            for mg in device.findall('.//mg'):
                mg_name = mg.attrib.get('name', None)
                wave = ''
                offset = 0
                gain = 0
                points = 0
                hz = 0
                binwave = []

                for m in mg:                
                    if m.attrib['name'] == 'Offset':
                        offset = int(m.text)
                    elif m.attrib['name'] == 'Gain':
                        gain = float(m.text)
                    elif m.attrib['name'] == 'Hz':
                        hz = int(m.text)   
                    elif m.attrib['name'] == 'Points':
                        points = int(m.text)
                    elif m.attrib['name'] == 'Wave':
                        wave = m.text

                        wave = base64.b64decode(wave)

                for i in range(0, len(wave)-1, 2):
                    t  = (wave[i]) + wave[i+1] * 256
                    t = set_bit(t, 15, 0) + (-32768) * (t >> 15)   

                    t = t * gain + offset

                    binwave.append(t)  

                data.append([seq, datetime, tzoffset, tz, 
                                    mg_name,hz,points,binwave]) 


    df = pd.DataFrame(data, columns=['seq', 'datetime', 'tzoffset', 'tz', 
                                    'mg_name','hz','points','binwave'])                        

    return df            

In [11]:
def extract_waveform(file_path):

    wave_data = decode_xml_file(file_path)
    
    return wave_data

In [51]:

fileName = r"H:\sml data\epic_wave_2_v2.tar\epic_wave_2_v2\UCI_deidentified_part2_EPIC_08_10\Waveforms\4c\4c1e9cec25ce278cIP-2020-02-04-02-30-01-809Z.xml"
df = extract_waveform(fileName)
df

Unnamed: 0,seq,datetime,tzoffset,tz,mg_name,hz,points,binwave
0,188734,2020-02-04T02:30:01.809Z,-07:00:00,Pacific Standard Time,GE_ECG,180,180,"[1439.0, -1229.0, 1436.0, -1229.0, 1432.0, -12..."
1,188734,2020-02-04T02:30:01.809Z,-07:00:00,Pacific Standard Time,GE_ART,180,180,"[227.0, -192.25, 225.5, -192.75, 226.25, -193...."
2,188735,2020-02-04T02:30:03.231Z,-07:00:00,Pacific Standard Time,GE_ECG,180,180,"[1442.0, -1256.0, 1446.0, -1258.0, 1448.0, -12..."
3,188735,2020-02-04T02:30:03.231Z,-07:00:00,Pacific Standard Time,GE_ART,180,180,"[231.25, -194.5, 231.0, -195.0, 231.0, -195.0,..."
4,188736,2020-02-04T02:30:04.830Z,-07:00:00,Pacific Standard Time,GE_ECG,180,180,"[1503.0, -1331.0, 1505.0, -1336.0, 1502.0, -13..."
...,...,...,...,...,...,...,...,...
2399,189931,2020-02-04T02:59:57.381Z,-07:00:00,Pacific Standard Time,GE_ART,180,180,"[-184.75, 225.0, -186.25, 225.0, -183.75, 224...."
2400,189932,2020-02-04T02:59:58.803Z,-07:00:00,Pacific Standard Time,GE_ECG,180,180,"[-1127.0, 1351.0, -1124.0, 1351.0, -1133.0, 13..."
2401,189932,2020-02-04T02:59:58.803Z,-07:00:00,Pacific Standard Time,GE_ART,180,180,"[-178.5, 215.5, -177.5, 216.5, -177.0, 217.0, ..."
2402,189933,2020-02-04T03:00:00.403Z,-07:00:00,Pacific Standard Time,GE_ECG,180,180,"[-1108.0, 1324.0, -1110.0, 1332.0, -1106.0, 13..."
