In [28]:
import h5py  as hdf
import numpy as np
import pandas as pd

### Files

***

Creating / reading a file

* `r`: read-only, file must exist
* `r+`: read/write, file must exist
* `w`: create a new file, truncate if it already exists
* `w-` or `x`: create a new file, fail if it exists
* `a`: read/write if it exists, create otherwise

In [141]:
f = hdf.File("test.hdf5", "a")
f

<HDF5 file "test.hdf5" (mode r+)>

File is being updated and saved in real time for as long as it's open!

### Groups

***
Creating a group in the file

* Groups are like dictionaries: keys are group names, and values are subgroups or datasets
* `File` is the `root group`

In [142]:
f.create_group("one", )
f.keys()

ValueError: Unable to create group (name already exists)

In [143]:
f.create_group("two/sub_two")
f.keys()

ValueError: Unable to create group (name already exists)

In [148]:
f.attrs["name"] = "Sasha"
f.attrs["age"] = 23
for key, val in f.attrs.items():
    print(key, val)

age 23
name Sasha


In [None]:
f.c

Groups/datasets can be deleted:

In [None]:
del f["one"]
f.keys()

### Datasets
***

Contain the actual data, and are the end of each file -> group branch.

To create a dataset, use `create_dataset` with a path and one of two things
* Input data
* Dataset shape and/or dtype

In [None]:
# f.create_dataset("two/two_data", dtype="f")
# f["two/two_data"]

In [None]:
del f["two/two_data"]

Print all level-1 groups/datasets: dataset `raw` and group `one`

In [None]:
for name in f:
    print(name)

Assign attributes to the dataset

In [None]:
f["raw"].attrs['zp'] = 0

In [None]:
f["one"].attrs['count'] = 1

In [None]:
f["one"].attrs

In [None]:
f.close()

### Dataframes

In [4]:
f.keys()

<KeysViewHDF5 ['two']>

In [5]:
temp = pd.read_csv("../../data/catalogs/SPOGs.csv")

In [27]:
f.attrs["name"] = 1
"name2" in f.attrs

False

In [20]:
f.create_dataset("three/meta2", data=temp)

TypeError: Object dtype dtype('O') has no native HDF5 equivalent

In [8]:
temp.to_hdf("test.hdf5", "three/meta")

In [16]:
df = pd.read_hdf("test.hdf5", "three/meta")
df

Unnamed: 0,no,pure,elg,ossy_id,name,objID,ra,dec,specObjID,Index,...,z,M_r,l(Mstar),Ha,Hbeta,[OIII],[NII],[SII],[OI],size
0,1,210,32153,518807,J0003+0048,587731187278020738,0.825886,0.812308,109153259184193536,1,...,16.72,-21.87,10.62,2.29,1.32,1.90,2.23,1.78,1.32,25
1,2,873,132169,295927,J0004-0114,588015507659227287,1.133002,-1.236577,109434741387689984,2,...,16.39,-21.33,10.47,2.29,1.14,1.97,2.27,1.90,1.55,25
2,3,83,14095,493360,J0010-1043,587727178450600065,2.614137,-10.728266,183464154114293760,3,...,16.46,-22.51,10.78,2.03,1.49,2.30,2.12,1.62,1.19,25
3,4,202,31389,96561,J0011-0054,587731185131454631,2.938412,-0.908533,109434740251033600,4,...,15.63,-20.87,10.13,2.60,1.45,1.93,2.82,2.50,2.18,25
4,5,196,29503,266427,J0015+1411,587730773353562270,3.985450,14.197489,211893599065866240,5,...,16.68,-20.75,10.23,2.02,1.17,1.31,1.53,1.40,0.86,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1062,1063,84,14601,332624,J2331-0934,587727179520082044,352.752378,-9.571255,182058421841821696,1063,...,16.73,-21.10,10.24,2.54,1.83,1.75,2.25,1.86,1.44,25
1063,1064,204,31533,203714,J2338-0034,587731185664721050,354.659078,-0.572637,108590677146730496,1064,...,16.67,-20.48,10.10,1.93,1.12,1.07,1.57,1.41,0.96,25
1064,1065,200,29822,278972,J2341+1454,587730774423699614,355.495464,14.908406,210486153403432960,1065,...,16.05,-21.57,10.45,2.02,1.14,2.04,2.37,1.76,1.46,25
1065,1066,91,16038,421060,J2348+1421,587727221412331652,357.125820,14.359572,210767621887361024,1066,...,16.47,-21.97,10.61,2.30,1.46,1.90,2.19,1.72,1.37,25


# PyTables

In [128]:
import tables as tab

In [129]:
f = tab.open_file("test.h5", mode="a", title="Galaxy")

In [140]:
f.root._v_attrs["dec"] = 1
f.root._v_attrs

/._v_attrs (AttributeSet), 6 attributes:
   [CLASS := 'GROUP',
    PYTABLES_FORMAT_VERSION := '2.1',
    TITLE := 'Galaxy',
    VERSION := '1.0',
    dec := 1,
    ra := 1]

Create group

* where
* name
* title
* `createparents=False`: whether to create parent groups needed for path to exist

In [101]:
f.create_group("/", "detector", "detector dataset")

NodeError: group ``/`` already has a child node named ``detector``

Create a table

* where
* name
* description: columns of the table
* title
* createparents = False

description can be set by a class with an `IsDscription` argument where the class defines table columns

In [102]:
class Particle(tab.IsDescription):
    name    = tab.StringCol(16)
    energy  = tab.Float64Col()

In [109]:
table = f.create_table("/detector", "readout", Particle, "particle readout")

NodeError: group ``/detector`` already has a child node named ``readout``

Populate the table with rows (different particles)

In [110]:
particle = table.row
for i in range(20):
    particle["name"] = f"particle{i}"
    particle["energy"] = i
    particle.append()

Flush all the data to file

In [111]:
table.flush()

Access the table again

In [112]:
pd.read_hdf("test.h5", key="detector/readout", mode="a").head(3)

Unnamed: 0,energy,name
0,0.0,particle0
1,1.0,particle1
2,2.0,particle2


Remove table

In [98]:
f.remove_node("/detector/readout")

In [115]:
f.root.detector.readout.attrs.name = "test"
f.root.detector.readout.attrs

/detector/readout._v_attrs (AttributeSet), 9 attributes:
   [CLASS := 'TABLE',
    FIELD_0_FILL := 0.0,
    FIELD_0_NAME := 'energy',
    FIELD_1_FILL := b'',
    FIELD_1_NAME := 'name',
    NROWS := 0,
    TITLE := 'particle readout',
    VERSION := '2.7',
    name := 'test']

In [117]:
f.get_node("/detector/readout")

/detector/readout (Table(20,)) 'particle readout'
  description := {
  "energy": Float64Col(shape=(), dflt=0.0, pos=0),
  "name": StringCol(itemsize=16, shape=(), dflt=b'', pos=1)}
  byteorder := 'little'
  chunkshape := (2730,)