In [1]:
# Going through the tutorial at the below URL: 
# https://www.christopherlovell.co.uk/blog/2016/04/27/h5py-intro.html

# Notes: 
# Just learning about command mode. Just press escape to get into it and then the key above gives all the 
# shortcuts. Pretty nice.


In [52]:
import numpy as np
import h5py

In [3]:
d1 = np.random.random(size = (1000,20))
d2 = np.random.random(size = (1000,200))  # Note that this means 1000 rows and 200 columns (rows x columns)

In [4]:
print( d1.shape, d2.shape)

(1000, 20) (1000, 200)


In [None]:
# WRITING an HDF

In [5]:
# Now we initialise the hdf file. The "w" denotes that we are writing to the file
hf = h5py.File('data.h5', 'w')

# And then we can use one of the methods associated with our hf object; create_dataset
hf.create_dataset('dataset_1', data=d1)
hf.create_dataset('dataset_2', data=d2)

# Note that running this cell multiple 

<HDF5 dataset "dataset_2": shape (1000, 200), type "<f8">

In [6]:
# And then we close the hdf file and it will write all of our work to disk. 
hf.close()

In [None]:
# READING an HDF

In [7]:
# Now we want to read the file we created: 

hf = h5py.File('data.h5', 'r')

# Then the keys() method gives us the type of hdf5 file we are looking at (what are the types?)
hf.keys()

<KeysViewHDF5 ['dataset_1', 'dataset_2']>

In [10]:
# Then we can grab the data with the get() method
n1 = hf.get('dataset_1')

In [12]:
# Convert this to an array: 
n1 = np.array(n1)
n1.shape

(1000, 20)

In [13]:
# Make sure to close it after you use the File() method
hf.close()

In [None]:
# GROUPS
# Groups are used to organize data within an HDF file. Much like a directory structure. 

In [67]:
# More fake data
d1 = np.random.random(size = (100,33))
d2 = np.random.random(size = (100,333))
d3 = np.random.random(size = (100,3333))

# Initailising an new hdf file 
hf1 = h5py.File('data1.h5', 'w')


In [68]:
# So then we make a group and can put the data into the groups just like a folder in a directory

g1 = hf1.create_group('group1')

g1.create_dataset('data1',data=d1)
g1.create_dataset('data2',data=d1)

<HDF5 dataset "data2": shape (100, 33), type "<f8">

In [69]:
# Subgroups, Using the / notation. Hmm. But does this make 2 groups or just 1? 

g2 = hf1.create_group('group2/subfolder')


In [70]:

g2.create_dataset('data3',data=d3)

<HDF5 dataset "data3": shape (100, 3333), type "<f8">

In [71]:
# Exploring: 

g4 = g1.create_group('group1/group3')
g4.create_dataset('data3',data=d3)
# OK. Hmm. How the directory structure works isn't clear to me. But you can create a group in a group object
# or within an hdf object. 

<HDF5 dataset "data3": shape (100, 3333), type "<f8">

In [72]:
stuff1 = g1.get('group1/group3')
# stuff2 = hf1.get('group1/group1/group3')

In [73]:
stuff1.items()
# stuff2.items()
# AHH. hard to follow but I believe using g1.create_group automatically adds group1/ to the front 
# of the name of your group. But why 

ItemsViewHDF5(<HDF5 group "/group1/group1/group3" (1 members)>)

In [74]:
stuff3 = hf1.get('group1/group1/group3')

In [75]:
print(stuff3)

<HDF5 group "/group1/group1/group3" (1 members)>


In [79]:
# Note: This is also helpful: http://docs.h5py.org/en/stable/quick.html
# Question: why isn't The items() method showing the data inside the group??

for name in hf1:
    print(name)

for name in g2:   # Not super clear on why subgroup doesn't show up here.. Only goes one level down?
    print(name)


data3


In [83]:
# You can reference the data with a directiory structure:

# if hf1['group1/group3/data3'] == 'data3':
#     print( 'yeahhhh')

print(hf1['group2/subfolder/data3'])

# Ok cool. So you can reference the dataset with this directory style. 

<HDF5 dataset "data3": shape (100, 3333), type "<f8">


In [84]:
hf1.close()


In [85]:
print(hf1['group2/subfolder/data3'])

# Now the same command won't work because I closed out the file hf1! 

ValueError: Not a location (invalid object ID)