In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Processing hdf5 data

This notebook provides examples for processing hdf5 data

In [3]:
from e11 import H5Data, add_column_index
from e11.process import vrange, statistics

In [4]:
# read file
import os 
fil = os.path.join(os.getcwd(), 'example_data', 'array_data.h5')
h5 = H5Data(fil)

100%|███████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 599.63it/s]


In [5]:
h5.log

Unnamed: 0_level_0,ACQUIRE,DATETIME,END,ERROR,LOOP,OVERRIDE,START,VAR:PAUSE,ELAPSED
squid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,10.036975,2017-12-01 15:41:22,3594988000.0,0,1,0,3594988000.0,300,00:00:00
2,10.021719,2017-12-01 15:41:33,3594988000.0,0,1,0,3594988000.0,600,00:00:11
3,10.034403,2017-12-01 15:41:45,3594988000.0,0,1,0,3594988000.0,900,00:00:23
4,10.022052,2017-12-01 15:41:55,3594988000.0,0,2,0,3594988000.0,300,00:00:33
5,10.027435,2017-12-01 15:42:06,3594988000.0,0,2,0,3594988000.0,600,00:00:44
6,10.027131,2017-12-01 15:42:18,3594988000.0,0,2,0,3594988000.0,900,00:00:56


## Vrange

Here, we are applying the vrange function to measure the vertical range of array data.

In [6]:
vr, info = vrange(h5, 'OSC_0', info=True)
vr.head()

100%|███████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 206.77sq/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,vrange
squid,repeat,Unnamed: 2_level_1
1,0,1.033028
1,1,1.012191
1,2,1.032403
1,3,1.070328
1,4,1.062143


In [7]:
# information about the processing
info

{'dataset': 'OSC_0',
 'datetime': '2018-01-10 13:00:04',
 'process': 'process.vrange()',
 'squids': array([1, 2, 3, 4, 5, 6])}

And use the statistics function to find the average vrange per squid.

In [8]:
av = statistics(vr)
av.head()

Unnamed: 0_level_0,vrange,vrange,vrange,vrange
Unnamed: 0_level_1,count,err,mean,std
squid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,25,0.00335,1.038482,0.016752
2,25,0.002847,1.039364,0.014236
3,26,0.001758,1.043457,0.008966
4,25,0.002988,1.04429,0.014941
5,25,0.003784,1.034683,0.018921


We can see from h5.log that there were repeat measurements with the same VAR conditions.  We can group these together and find the statistics for unique VAR values.

In [9]:
# map measurements to var values
df = h5.var.join(vr)
# evaluate the statistics when grouped by var values
st = statistics(df, groupby=h5.var.columns, mode='full')
st.head()

Unnamed: 0_level_0,vrange,vrange,vrange,vrange,vrange,vrange,vrange,vrange
Unnamed: 0_level_1,count,err,max,mean,median,min,range,std
PAUSE,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
300,50,0.00226,1.083854,1.041386,1.040635,1.000539,0.083316,0.015981
600,50,0.002367,1.06408,1.037024,1.042134,0.997321,0.066759,0.016739
900,51,0.001702,1.069203,1.040724,1.041619,1.012878,0.056325,0.012155
