# Timing Alignment for OPM-MEG

We'll need to read in some LVM files

In [1]:
from dfply import *
import pandas as pd
import numpy as np
from pathlib import Path
from timer import Timer

cwd = Path('.').resolve()
proj_root = cwd.parent.parent.parent

lvm =  proj_root / 'nottingham' / '10760_91' / 'QZFM_1.lvm'
hdf = lvm.with_suffix('.h5')

# first, we read in the header portion
with Timer('read hdr'):
    hdr = pd.read_csv(lvm, 
                      sep = '\t', 
                      skiprows=14, 
                      nrows=7, 
                      header=None)

# next, we re-shape the header to a table of channel attributes
with Timer('dply hdr'):
    seconds = pd.Timedelta(seconds=1.0)

    # here's a function to combine the Date and Time columns
    @make_symbolic
    def combine_date_time(date_s, time_s):
        return time_s + (date_s - time_s.dt.normalize())
        
    # here's a function to format things
    @make_symbolic
    def as_string(series, format_string='{}'):
        return series.map(format_string.format)
        
    # here's a function to convert a float to an int
    @make_symbolic
    def as_int(series):
        return series.astype(int)
    
    header = (
        hdr >>
        gather('channel', 'value', columns_from(1)) >>
        spread(0, X.value, convert=True) >>
        mask(X.Samples > 0) >>
        mutate(channel=X.channel - colmin(X.channel)) >>
        mutate(offset=(X.Time - colmin(X.Time)) / seconds) >>
        mutate(start=combine_date_time(X.Date, X.Time)) >>
        mutate(name=as_string(X.channel, format_string='ch{:03d}')) >>
        mutate(Samples=as_int(X.Samples)) >>
        select(X.channel, X.name, X.offset, X.start, X.Samples, X.Y_Unit_Label, X.X_Dimension, X.X0, X.Delta_X) >>
        arrange(X.channel)
    )

header.head(3)

[read hdr] Elapsed: 0.016103744506835938
[dply hdr] Elapsed: 0.06752705574035645


Unnamed: 0,channel,name,offset,start,Samples,Y_Unit_Label,X_Dimension,X0,Delta_X
0,0,ch000,0.0,2018-05-09 12:02:25.693492,1200,Volts,Time,0.0,0.000833
1,1,ch001,1.4e-05,2018-05-09 12:02:25.693506,1200,Volts,Time,0.0,0.000833
2,2,ch002,2.8e-05,2018-05-09 12:02:25.693520,1200,Volts,Time,0.0,0.000833


In [2]:
# next, we load in the actual data (n_obvs x n_chan)
with Timer('read dat'):
    dat = pd.read_csv(lvm, sep='\t', skiprows=22)
    
dat.head(3)

[read dat] Elapsed: 16.84319496154785


Unnamed: 0,X_Value,cDAQ1Mod1/ai0,cDAQ1Mod1/ai1,cDAQ1Mod1/ai2,cDAQ1Mod1/ai3,cDAQ1Mod1/ai4,cDAQ1Mod1/ai5,cDAQ1Mod1/ai6,cDAQ1Mod1/ai7,cDAQ1Mod1/ai8,cDAQ1Mod1/ai9,cDAQ1Mod1/ai10,cDAQ1Mod1/ai11,cDAQ1Mod1/ai12,cDAQ1Mod1/ai13,cDAQ1Mod1/ai14,cDAQ1Mod1/ai15,cDAQ1Mod1/ai16,cDAQ1Mod1/ai17,cDAQ1Mod1/ai18,cDAQ1Mod1/ai19,cDAQ1Mod1/ai20,cDAQ1Mod1/ai21,cDAQ1Mod1/ai22,cDAQ1Mod1/ai23,cDAQ1Mod1/ai24,cDAQ1Mod1/ai25,cDAQ1Mod1/ai26,cDAQ1Mod1/ai27,cDAQ1Mod1/ai28,cDAQ1Mod1/ai29,cDAQ1Mod1/ai30,cDAQ1Mod1/ai31,cDAQ1Mod3/ai0,cDAQ1Mod3/ai1,cDAQ1Mod3/ai2,cDAQ1Mod3/ai3,cDAQ1Mod3/ai4,cDAQ1Mod3/ai5,cDAQ1Mod3/ai6,...,cDAQ1Mod3/ai9,cDAQ1Mod3/ai10,cDAQ1Mod3/ai11,cDAQ1Mod3/ai12,cDAQ1Mod3/ai13,cDAQ1Mod3/ai14,cDAQ1Mod3/ai15,cDAQ1Mod3/ai16,cDAQ1Mod3/ai17,cDAQ1Mod3/ai18,cDAQ1Mod3/ai19,cDAQ1Mod3/ai20,cDAQ1Mod3/ai21,cDAQ1Mod3/ai22,cDAQ1Mod3/ai23,cDAQ1Mod3/ai24,cDAQ1Mod3/ai25,cDAQ1Mod3/ai26,cDAQ1Mod3/ai27,cDAQ1Mod3/ai28,cDAQ1Mod3/ai29,cDAQ1Mod3/ai30,cDAQ1Mod3/ai31,cDAQ1Mod5/ai0,cDAQ1Mod5/ai1,cDAQ1Mod5/ai2,cDAQ1Mod5/ai3,cDAQ1Mod5/ai4,cDAQ1Mod5/ai5,cDAQ1Mod5/ai6,cDAQ1Mod5/ai7,cDAQ1Mod8/ai0,cDAQ1Mod8/ai1,cDAQ1Mod8/ai2,cDAQ1Mod8/ai3,cDAQ1Mod8/ai4,cDAQ1Mod8/ai5,cDAQ1Mod8/ai6,cDAQ1Mod8/ai7,Comment
0,0.0,4.741264,-0.581624,4.771811,-0.088813,4.815825,0.542597,4.743727,-0.233816,4.236259,0.240768,4.74422,-0.344169,4.232317,0.055696,0.042888,-0.010647,4.274688,-0.340228,4.26894,0.085419,-0.105892,0.270983,0.521742,0.380187,0.270491,0.269177,0.496288,0.54818,-0.686065,0.442754,4.235273,0.250949,4.252752,0.102282,4.224676,-0.231997,4.246841,0.098999,4.228616,...,0.007055,4.249797,0.162538,4.238632,0.219346,4.246513,-0.171741,4.262275,-0.621277,4.219093,-0.135621,-0.097694,-0.337567,-0.288969,-0.317208,4.267858,1.02188,4.207928,0.064684,4.291337,-0.366135,4.246185,0.067968,-0.600543,-0.580848,-0.604317,-0.586756,4.79832,0.040194,4.791426,-0.577073,-0.000189,-0.002818,5.052748,0.005562,0.000468,-0.000518,-0.002161,4.849486,
1,0.000833,4.740443,-0.563396,4.770661,-0.062539,4.815168,0.524862,4.744548,-0.250073,4.237572,0.230751,4.744713,-0.349752,4.232646,0.097079,0.109559,0.059473,4.275016,-0.332181,4.268776,0.096093,-0.106877,0.282971,0.520756,0.379038,0.281329,0.280508,0.501707,0.502693,-0.68311,0.445545,4.236587,0.266714,4.252588,0.089969,4.225168,-0.246609,4.247334,0.104417,4.228452,...,0.001473,4.25111,0.165986,4.239617,0.227555,4.247005,-0.16895,4.26129,-0.60568,4.220243,-0.122978,-0.087022,-0.322791,-0.284207,-0.313596,4.268514,1.037642,4.207764,0.083401,4.289859,-0.353329,4.245692,0.084386,-0.599066,-0.578714,-0.602184,-0.585443,4.79832,0.041343,4.791754,-0.572313,0.000632,0.000797,5.050283,-0.000189,0.003261,0.00589,0.003754,4.847843,
2,0.001667,4.742085,-0.543526,4.771482,0.034677,4.813854,0.509097,4.742906,-0.273885,4.237572,0.217449,4.74307,-0.368144,4.232646,-0.022142,0.052741,0.018912,4.276002,-0.347125,4.268776,0.081478,-0.093576,0.285599,0.520756,0.374768,0.272133,0.27279,0.498259,0.543582,-0.67884,0.445545,4.235273,0.272626,4.252916,0.079132,4.224347,-0.275013,4.247005,0.097193,4.22796,...,-0.013796,4.251767,0.139224,4.239945,0.209823,4.246841,-0.186682,4.261947,-0.62883,4.219586,-0.148099,-0.103276,-0.345448,-0.288804,-0.316223,4.268514,1.022537,4.209242,0.067804,4.291173,-0.371717,4.244543,0.072237,-0.599722,-0.57855,-0.602512,-0.5861,4.798156,0.02214,4.790933,-0.593485,0.006712,-0.000189,5.052912,-0.008405,0.00014,-0.00364,-0.000846,4.848993,


In [3]:
# now, let's replace the dummy name we created in the
# header above with the actual channel name from the 
# column names
header['name'] = dat.columns.drop(['X_Value', 'Comment'])

# finally, we reshape the data so that each sample is in a single row
with Timer('dply dat'):
    # smaller dataframe cuts down on memory and processing
    channels = (
        header >>
        select(X.channel, X.name, X.offset)
    )
    
    # here's where we re-arrange
    data = (
        dat >>
        mutate(frame=row_number(X.X_Value)) >>
        mutate(frame=as_int(X.frame)) >>
        drop(X.Comment) >>
        gather('name', 'Y_Value', starts_with('cDAQ')) >>
        inner_join(channels, by='name') >>
        mutate(time=X.X_Value + X.offset) >>
        select(X.channel, X.frame, X.time, X.Y_Value)
    )
    
data.head(3)

[dply dat] Elapsed: 54.920976877212524


Unnamed: 0,channel,frame,time,Y_Value
0,0,1,0.0,4.741264
1,0,2,0.000833,4.740443
2,0,3,0.001667,4.742085


In [22]:
# now, we'll output the files
with Timer('write hdr hdf'):
    header.to_hdf(
        hdf,         
        mode='w',
        format='table',
        key='header',
        complib='zlib',
        complevel=9,
        data_columns=True,
        index=False
    )

with Timer('write dat hdf'):
    for chan in channels['channel']:
        ch = (
            data >>
            mask(X.channel == chan) >>
            arrange(X.frame)
        )
        
        with Timer(f'write ch{chan:03d} hdf'):
            ch.to_hdf(
                hdf,
                mode='r+',
                format='table',
                key=f'data/ch{chan:03d}',
                complib='zlib',
                complevel=9,
                data_columns=True,
                index=False
            )



#with Timer('write dat tsv'):
#    data.to_csv(
#        data_tsv, 
#        sep='\t', 
#        index=False, 
#        float_format='%.6f',
#        compression='gzip',
#        chunksize=2e7)
# ~ 730s



[write hdr hdf] Elapsed: 0.05467486381530762
[write ch000 hdf] Elapsed: 0.677130937576294
[write ch001 hdf] Elapsed: 0.5959851741790771
[write ch002 hdf] Elapsed: 0.7470848560333252
[write ch003 hdf] Elapsed: 1.2251698970794678
[write ch004 hdf] Elapsed: 0.7225778102874756
[write ch005 hdf] Elapsed: 0.6593310832977295
[write ch006 hdf] Elapsed: 0.8199498653411865
[write ch007 hdf] Elapsed: 0.6341159343719482
[write ch008 hdf] Elapsed: 0.9357109069824219
[write ch009 hdf] Elapsed: 0.625452995300293
[write ch010 hdf] Elapsed: 0.8002438545227051
[write ch011 hdf] Elapsed: 0.6498339176177979
[write ch012 hdf] Elapsed: 0.9236042499542236
[write ch013 hdf] Elapsed: 1.4237380027770996
[write ch014 hdf] Elapsed: 1.4018566608428955
[write ch015 hdf] Elapsed: 1.2737140655517578
[write ch016 hdf] Elapsed: 0.9529590606689453
[write ch017 hdf] Elapsed: 0.6866259574890137
[write ch018 hdf] Elapsed: 0.9000749588012695
[write ch019 hdf] Elapsed: 0.6825838088989258
[write ch020 hdf] Elapsed: 0.71617007