# Timing Alignment for OPM-MEG

We'll need to read in some LVM files

In [3]:
from dfply import *
import pandas as pd
import numpy as np
from pathlib import Path
from hive.timer import Timer

proj_root = Path('/data/hnl/opm-meg').resolve()

lvm =  proj_root / 'incoming' / 'sensor_data' / 'Nottingham' / '10760_91' / 'QZFM_1.lvm'
hdf = lvm.with_suffix('.h5')

# first, we read in the header portion
with Timer('read hdr'):
    hdr = pd.read_csv(lvm, 
                      sep = '\t', 
                      skiprows=14, 
                      nrows=7, 
                      header=None)

# next, we re-shape the header to a table of channel attributes
with Timer('dply hdr'):
    seconds = pd.Timedelta(seconds=1.0)

    # here's a function to combine the Date and Time columns
    @make_symbolic
    def combine_date_time(date_s, time_s):
        return time_s + (date_s - time_s.dt.normalize())
        
    # here's a function to format things
    @make_symbolic
    def as_string(series, format_string='{}'):
        return series.map(format_string.format)
        
    # here's a function to convert a float to an int
    @make_symbolic
    def as_int(series):
        return series.astype(int)
    
    header = (
        hdr >>
        gather('channel', 'value', columns_from(1)) >>
        spread(0, X.value, convert=True) >>
        mask(X.Samples > 0) >>
        mutate(channel=X.channel - colmin(X.channel)) >>
        mutate(offset=(X.Time - colmin(X.Time)) / seconds) >>
        mutate(start=combine_date_time(X.Date, X.Time)) >>
        mutate(name=as_string(X.channel, format_string='ch{:03d}')) >>
        mutate(Samples=as_int(X.Samples)) >>
        select(X.channel, X.name, X.offset, X.start, X.Samples, X.Y_Unit_Label, X.X_Dimension, X.X0, X.Delta_X) >>
        arrange(X.channel)
    )

header.head(3)

read hdr: 0.067195 seconds
dply hdr: 0.074200 seconds


Unnamed: 0,channel,name,offset,start,Samples,Y_Unit_Label,X_Dimension,X0,Delta_X
0,0,ch000,0.0,2018-05-09 12:02:25.693492,1200,Volts,Time,0.0,0.000833
1,1,ch001,1.4e-05,2018-05-09 12:02:25.693506,1200,Volts,Time,0.0,0.000833
2,2,ch002,2.8e-05,2018-05-09 12:02:25.693520,1200,Volts,Time,0.0,0.000833


In [None]:
# next, we load in the actual data (n_obvs x n_chan)
with Timer('read dat'):
    dat = pd.read_csv(lvm, sep='\t', skiprows=22)
    
dat.head(3)

In [None]:
# now, let's replace the dummy name we created in the
# header above with the actual channel name from the 
# column names
header['name'] = dat.columns.drop(['X_Value', 'Comment'])

# finally, we reshape the data so that each sample is in a single row
with Timer('dply dat'):
    # smaller dataframe cuts down on memory and processing
    channels = (
        header >>
        select(X.channel, X.name, X.offset)
    )
    
    # here's where we re-arrange
    data = (
        dat >>
        mutate(frame=row_number(X.X_Value)) >>
        mutate(frame=as_int(X.frame)) >>
        drop(X.Comment) >>
        gather('name', 'Y_Value', starts_with('cDAQ')) >>
        inner_join(channels, by='name') >>
        mutate(time=X.X_Value + X.offset) >>
        select(X.channel, X.frame, X.time, X.Y_Value)
    )
    
data.head(3)

In [None]:
# now, we'll output the files
with Timer('write hdr hdf'):
    header.to_hdf(
        hdf,         
        mode='w',
        format='table',
        key='header',
        complib='zlib',
        complevel=9,
        data_columns=True,
        index=False
    )

with Timer('write dat hdf'):
    for chan in channels['channel']:
        ch = (
            data >>
            mask(X.channel == chan) >>
            arrange(X.frame)
        )
        
        with Timer(f'write ch{chan:03d} hdf'):
            ch.to_hdf(
                hdf,
                mode='r+',
                format='table',
                key=f'data/ch{chan:03d}',
                complib='zlib',
                complevel=9,
                data_columns=True,
                index=False
            )



#with Timer('write dat tsv'):
#    data.to_csv(
#        data_tsv, 
#        sep='\t', 
#        index=False, 
#        float_format='%.6f',
#        compression='gzip',
#        chunksize=2e7)
# ~ 730s

