# DPA conversion to standard trace format.

In [1]:
import bz2
import math
import zipfile
import numpy as np
import tempfile
from tqdm import tqdm

from src.data.dpa import DPA4

In [2]:
TMP_DIR = tempfile.mkdtemp()

for fname in tqdm(list(DPA4.TRACE_URLS.keys())):
    with zipfile.ZipFile(f"{DPA4.ROOT_RAW}/{fname}","r") as zip_ref:
        zip_ref.extractall(TMP_DIR)

TMP_DIR

  0%|          | 0/10 [00:11<?, ?it/s]


KeyboardInterrupt: 

In [None]:
with bz2.open(f"{TMP_DIR}/DPA_contestv4_2/k00/DPACV42_000000.trc.bz2", 'rb') as file:
    EXAMPLE_TRACE = bytearray(file.read())

TRACE_SIZE = 1704402
HEADER_SIZE = 357
len(EXAMPLE_TRACE) - TRACE_SIZE


### Header offsets

Acquired from [the DPA contest website](http://www.dpacontest.org/v4/42_traces.php) on 2020-11-26.

In [None]:
HEADER_FIELDS = {
    0: ("Descriptor Name", str),
    16: ("Template Name", str),
    32: ("Comm Type", int),
    34: ("Comm Order", int),
    36: ("Wave Descriptor", int),
    40: ("User Text", int),
    44: ("Res Desc1", int),
    48: ("TrigTime Array", int),
    52: ("Ris Time Array", int),
    56: ("Res Array 1", int),
    60: ("Wave Array 1", int),
    64: ("Wave Array 2", int),
    76: ("Instrument Name", str),
    116: ("Wave Array Count", int),
    (116 + 4): ("END", None),
    124: ("First Valid Point", int),
    128: ("Last Valid Point", int),
    (128 + 4): ("END", None),
    156: ("Vertical gain", float),
    160: ("Vertical offset", float),
    172: ("Nominal Bits", int),
    (172 + 2): ("END", None),
}

In [None]:
START = 11

In [None]:
def parse_header(trace, start=START):
    offset = 0
    for next_offset in list(HEADER_FIELDS.keys())[1:]:
        f_name, f_type = HEADER_FIELDS[offset]
        header_part = trace[ offset+start:next_offset+start ]

        if f_type == str:
            print(f_name, ":\t", header_part.decode('ascii').rstrip('\x00'))
        elif f_type == int:
            print(f_name, ":\t", int.from_bytes(header_part, "little", signed=True))

        offset = next_offset

parse_header(EXAMPLE_TRACE)

In [None]:
SPLIT = 5000
def get_filename(ix):
    return f"{TMP_DIR}/DPA_contestv4_2/k{str(math.floor(ix / SPLIT)).zfill(2)}/DPACV42_{str(ix).zfill(6)}.trc.bz2"

def get_trace_from_bz2(ix):
    with bz2.open(get_filename(ix), 'rb') as file:
        res = bytearray(file.read())

    return res

In [None]:
def get_trace(trace, start=START):
    # Check that the trace has the expected start offset.
    assert trace[start:start + 16].decode('ascii').rstrip('\x00') == "WAVEDESC"

    p_first = int.from_bytes(trace[124+start:128+start], "little", signed=True)
    p_last = int.from_bytes(trace[128+start:132+start], "little", signed=True)

    # Check that the trace has expected size.
    assert p_last - p_first == TRACE_SIZE - 1
    # Check that the trace ends with expected junk bytes.
    assert trace[-2:] == b'\xee\xee'

    return trace[-(p_last + 2):-2]

np.array([int(b) for b in get_trace(EXAMPLE_TRACE)])

In [None]:
import seaborn as sns

sns.lineplot(data=np.array([int(b) for b in get_trace(EXAMPLE_TRACE)])[:1000])

In [3]:
### DPA header description

# 0 	Descriptor Name 	Null terminated string  The first 8 chars are always "WAVEDESC" 	        WAVEDESC
# 16 	Template Names      Null terminated string  LECROY_2_3
# 32 	Comm Type 	        16-bit data 	        Format of data samples                              (0: byte (8-bit signed values), 1: word (16-bit signed values)) 	0 (8-bit signed values)
# 34 	Comm Order 	        16-bit data 	        Format of data samples                              (0: MSB first, 1: LSB first) 	1 (LSB first)
# 36 	Wave Descriptor 	32-bit signed data 	    Length in bytes of the block WAVEDESC 	            346
# 40 	User Text 	        32-bit signed data 	    Length in bytes of the block USERTEXT 	            0
# 44 	Res Desc1       	32-bit signed data 	    Length in bytes of the block RES_DESC1 	            0
# 48 	TrigTime Array  	32-bit signed data 	    Length in bytes of the TRIGTIME array 	            0
# 52 	Ris Time Array  	32-bit signed data 	    Length in bytes of the RIS_TIME array 	            0
# 56 	Res Array 1 	    32-bit signed data 		                                                    0
# 60 	Wave Array 1 	    32-bit signed data 	    Length in bytes of the 1st data array 	            1,704,402
# 64 	Wave Array 2 	    32-bit signed data 	    Length in bytes of the 2nd data array 	            0
# 76 	Instrument Name 	Null terminated string  Name of the instrument 	                            LECROYWR6100A
# 116 	Wave Array Count 	32-bit signed data  	Number of data points (samples) in the data array 	1,704,402
# 124 	First Valid Point 	32-bit signed data 	    Number of points to skip before first good point 	0
# 128 	Last Valid Point 	32-bit signed data 	    Index of last good data point 	                    1,704,401
# 156 	Vertical gain 	    float                   (32-bit IEEE floating point value) 	                Vertical gain
# 160 	Vertical offset 	float                   (32-bit IEEE floating point value) 	                Vertical offset
# 172 	Nominal Bits 	    16-bit signed data 	    Intrinsic precision of the observation 	            8 bits
