# Timing test for IO operation in ChiantiPy
### Author: Mark Cheung, cheung@lmsal.com
## Problem: ChiantiPy.tools.io.wgfaRead() is the main IO bottleneck. 
## Summary: The time it takes to do initialize an ion (e.g. fe14) is dominated by the time spent in wgfaRead().
##          Saving the wgfa files in the HDF5 format (https://www.hdfgroup.org) results in > 300x speed up in wfga read time, and ~ 20x speedup in ion setup time.

In [7]:
import os
import numpy as np
import glob
import h5py
os.environ['XUVTOP'] = '/Users/cheung/python/ChiantiPy-master/dbase'

In [8]:
import ChiantiPy
import ChiantiPy.core as ch
import ChiantiPy.tools.io as io
import ChiantiPy.tools.util as util

## Do some timing tests with IO routines

In [9]:
temp = 10.**(5.8 + 0.05*np.arange(21.))
edens = 1.e+9

In [10]:
%%timeit
fe14 = ch.ion('fe_14', temperature=temp, eDensity=edens, hdf5=False)

3.12 s ± 39.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit
fe14 = ch.ion('fe_14', temperature=temp, eDensity=edens, hdf5=True)

162 ms ± 1.38 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## How come the previous command (with hfd5=True) is so much faster?

In [None]:
%%timeit
io.elvlcRead('fe_14')
#This is not a bottleneck

In [None]:
%%timeit
io.scupsRead('fe_14')
#This is not a bottleneck

In [5]:
%%timeit
io.splupsRead('fe_14', filetype='psplups')
#This is not a bottleneck

921 µs ± 5.68 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [14]:
%%timeit
io.wgfaRead('fe_14')
#This is THE bottleneck

3.06 s ± 35.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
a = io.wgfaRead('fe_14')

### Let's save the output from wgfaRead() into a HDF5 file

In [17]:
def safe_str(obj):
    return obj.encode('utf-8','ignore')

def wgfa2h5(a,filename):
    h = h5py.File(filename,mode='w')
    for k, v in a.items():
        #print(k, len(v))
        if k == 'ref':
            vv = np.zeros(len(v),dtype=np.string_) #'|S230')
            #There are some strange characters in the comments of the wfga files. Need to encode. 
            for p in range(len(v)):
                vv[p] = safe_str(v[p])
            h.create_dataset(k, data=np.array(vv, dtype=np.string_))
        elif k == 'ionS':
            h.create_dataset(k, data=np.array(v, dtype=np.string_)) #'|S5'))
        elif k == 'filename':
            h.create_dataset(k, data=np.array(v, dtype=np.string_)) #'|S58'))
        elif k == 'pretty1':
            h.create_dataset(k, data=np.array(v, dtype=np.string_)) #'|S18'))
        elif k == 'pretty2':
            h.create_dataset(k, data=np.array(v, dtype=np.string_)) #'|S18'))
        else:
            h.create_dataset(k, data=np.array(v))
    h.close()
    return 

!rm test.h5
print(a.items())
wgfa2h5(a,'test.h5')

rm: test.h5: No such file or directory


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



## Let's try reading in the HDF5 file

In [18]:
h = h5py.File('test.h5', mode='r')
b = {}
for k in list(h.keys()):
    b[k] = h[k].value
h.close()

# NOTE: wgfaRead() time is 3 s, hdf5 read time is 8 ms

In [20]:
base = os.environ['XUVTOP']
atoms = np.sort(glob.glob('{}/*'.format(os.environ['XUVTOP'])))

### The following block is code to convert all wgfa files to HDF5.

In [21]:
# Convert all wgfa files into HDF5
for atom in atoms:
    astr = (atom.split('/'))[-1]
    ions = (glob.glob("{}/{}/{}*".format(base,astr,astr)))
    #print(astr)
    for ion in ions:
        istr = (ion.split('/'))[-1]
        #print(istr)
        wfiles = glob.glob("{}/{}/{}/*.wgfa".format(base,astr,istr))
        for w in wfiles:
            wgfa = io.wgfaRead(istr, filename=w, total=True)
            filename="/Users/cheung/python/ChiantiPy-master/dbase/{}/{}/{}.h5".format((istr.split('_'))[0], istr, istr)
            wgfa2h5(wgfa, filename)
            print('Wrote out',filename)


Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_13/al_13.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_12/al_12.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_9/al_9.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_7/al_7.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_6/al_6.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_8/al_8.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_10/al_10.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_12d/al_12d.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_11/al_11.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_3/al_3.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_4/al_4.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_5/al_5.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/al/al_2/al_2.h5
Wrote out /Users/cheung/python/ChiantiPy-master/dbase/ar/ar_10/ar_10.h5
Wrote 