In [1]:
import h5py

In [3]:
# open hdf5 data for reading mode, much like how you open a file using open(file, mode)
# for writing to a file you will use 'w', or read/write r+
# more information about the mode can be found in this page
# http://docs.h5py.org/en/stable/high/file.html
dataset = h5py.File('dataset.h5', 'r')

In [4]:
# the dataset behaves like a python dictionary with data stored in it.
# here are the groups i created
dataset.keys()

[u'test', u'train', u'validate']

In [5]:
# to open a given group, say 'train'
group = dataset['train']

In [6]:
# the group again behaves like a python dictionary
# I am showing the keys in this group, each corresponding
# to an actual data from a detector. The key is a unique
# identifier to that detector data
group.keys()

[u'0.1',
 u'0.10',
 u'0.100',
 u'0.1001',
 u'0.1002',
 u'0.1004',
 u'0.1005',
 u'0.1006',
 u'0.1008',
 u'0.1009',
 u'0.101',
 u'0.1010',
 u'0.1013',
 u'0.1014',
 u'0.1016',
 u'0.1017',
 u'0.1018',
 u'0.1019',
 u'0.102',
 u'0.1021',
 u'0.1023',
 u'0.103',
 u'0.104',
 u'0.105',
 u'0.106',
 u'0.109',
 u'0.110',
 u'0.111',
 u'0.113',
 u'0.114',
 u'0.115',
 u'0.116',
 u'0.117',
 u'0.118',
 u'0.119',
 u'0.120',
 u'0.121',
 u'0.122',
 u'0.123',
 u'0.124',
 u'0.125',
 u'0.126',
 u'0.127',
 u'0.128',
 u'0.129',
 u'0.13',
 u'0.131',
 u'0.132',
 u'0.133',
 u'0.134',
 u'0.135',
 u'0.136',
 u'0.137',
 u'0.138',
 u'0.14',
 u'0.141',
 u'0.143',
 u'0.145',
 u'0.146',
 u'0.147',
 u'0.148',
 u'0.149',
 u'0.15',
 u'0.150',
 u'0.151',
 u'0.152',
 u'0.153',
 u'0.154',
 u'0.155',
 u'0.156',
 u'0.157',
 u'0.158',
 u'0.159',
 u'0.161',
 u'0.162',
 u'0.163',
 u'0.164',
 u'0.165',
 u'0.166',
 u'0.167',
 u'0.168',
 u'0.169',
 u'0.17',
 u'0.173',
 u'0.174',
 u'0.175',
 u'0.177',
 u'0.178',
 u'0.179',
 u'0.18',
 u

In [8]:
# as you can see there are about 50880 detector data stored in training group.
print len(group.keys())

50880


In [9]:
# now we look at a given det data. pick a random detector to look at
# say we look at the detector corresonding to key '0.676'
det_data = group['0.676']

In [10]:
# this is now an actual dataset
type(det_data)

h5py._hl.dataset.Dataset

In [11]:
# to actually load the data from this detector dataset
# simply do
data = det_data[:]

In [13]:
print type(data)
print data

<type 'numpy.ndarray'>
[-1.12883792 -1.14458076 -1.15281669 ..., -1.18195189 -1.16803627
 -1.1477213 ]


In [15]:
# now you can see that it is loaded as a numpy array that you can use for furthur processing
# there are also metadata associated with each detector data, these are contained in a 
# dictionary under attrs. I only included the 10 parameters that Jesse suggested, but 
# it's easy to add more parameters as metadata. 
det_data.attrs.keys()

[u'corrLive',
 u'rmsLive',
 u'kurtLive',
 u'DELive',
 u'MFELive',
 u'skewLive',
 u'normLive',
 u'darkRatioLive',
 u'jumpLive',
 u'gainLive',
 u'label']

In [21]:
# to look at the value for a given parameter
print det_data.attrs['jumpLive']

1.67555e-12


In [22]:
# label parameter contained the 'sel' parameter, the truth
# 0 means unselected (bad), 1 means selected (good)
det_data.attrs['label']

0

In [None]:
# to add a new parameter as attribute you can do
# > det_data.attrs['your_cool_new_feature'] = somevalue
# make sure you open the file in write mode 
# and then make sure you close the file properly with
# > dataset.close()

In [None]:
# a sample workflow to load all training data and generate a new feature for each
# of them would be
dataset = h5py.File('dataset.h5', 'r+')

# suppose we are working on train set

grp = dataset['train']
for key in grp.keys():
    # get dataset
    det_data = grp[k]
    
    # retrieve data as numpy array
    data = det_data[:]
    
    # do something with the the data to generate your cool feature
    new_feature = generate_your_cool_feature(data)
    
    # save the new feature as a metadata
    data.attrs['your_cool_feature_name'] = new_feature
    
# save and close the file then you should be done
dataset.close()